#
# Dataset Specification
#
dataset:
  # The root directory of the dataset. This can be a local
  # file system directory, an S3 bucket, or a GCP bucket.
  root_path: https://masterful-public.s3.us-west-1.amazonaws.com/datasets/yymnist
  # The name of the splits to use in training. These will
  # point to a CSV file in the "root_dir" of the same name,
  # such as <split name>.csv. Each split can be referenced below
  # in the "training" and "evaluation" sections. Splits defined
  # here can either be labeled or unlabeled.
  splits: [train, test, unlabeled]
  # OPTIONAL: The name of the label map file. The label map file is a
  # CSV file where the first entry in each row is the integer label and
  # the second entry is human readable string class name. The label map file
  # is used to replace the class id's in the evaluation metrics, for easier
  # reading. If it does not exist, then the class ids will be used. The
  # label map file must end in ".csv" and be located at '<root_path>/<label_map>.csv'
  #label_map: label_map
  # Supported label formats:
  #   classification_csv: [default]
  #   detection_csv: CSV delimited, follows: relative image path, xmin, ymin, xmax, ymax, class_id, ...
  label_format: detection_csv
  # OPTIONAL: True if we should save an optimized version of the dataset
  # locally, False otherwise. Optimizing the dataset locally
  # adds a small, one-time dataset processing cost in order
  # to convert the raw dataset into the optimized version. But doing
  # so will significantly improve training times. The optimization
  # conversion only happens once. Subsequent training runs
  # will use optimized version of the dataset.
  optimize: True

#
# Model Specification
#
model:
  # The name of the architecture to use. The model
  # returned at the end of training will be based on
  # the architecture specified below, and will be ready
  # to use for inference. The model will include all
  # preprocessing and standardization, and will expect
  # single, unresized 8-bit 3 channel RGB uint8/uint32 images
  # with pixel ranges [0,255].
  architecture: ssd_mobilenet_v2
  # The number of classes in the training dataset and model predictions.
  # For binary_classification, this should be set to 1.
  num_classes: 10
  # The input shape to use for training. All data will be transformed
  # using an aspect-ratio preserving resize to this shape, which is
  # in HWC format. Note larger input shapes will take more memory
  # and be longer to train, but preserve the most detail. Smaller
  # input shapes will train faster, but could lose useful detail
  # in the image features. Input shape is specified as
  # [height, width, num_channels]. num_channels must be 3.
  input_shape: [416,416,3]

#
# Training Specification
#
training:
  # The task to perform. Currently, the trainer support the
  # following tasks:
  #    classification - Multi-class classification task.
  #    binary_classification - Binary classification task.
  #    multilabel_classification - Multi-class Multi-label classification task.
  task: detection
  # The dataset set to use for training. This must be a labeled
  # dataset.
  training_split: train
  # OPTIONAL: The dataset split to use for validation. If no split
  # is set here, then a validation split will be created automatically
  # from the training dataset split.
  #validation_split: validation
  # OPTIONAL: The unlabeled split to use for training.
  unlabeled_split: unlabeled

#
# Output Specification
output:
  # A list of output formats for the trained model.
  #
  # Supported output formats are:
  #   saved_model - Tensorflow Saved Model format (https://www.tensorflow.org/guide/saved_model)
  #   onnx - Open Neural Network Exchange model format (https://onnx.ai/)
  formats: [saved_model, onnx]
  # The path to save the output into.
  path: ~/model_output

#
# Evaluation Specification
#
evaluation:
  # The dataset split to use for evaluation. This should
  # be a dataset split that is not used in training, otherwise
  # your evaluation metrics will not be representative of the generalization
  # performance of your model.
  split: test