# # Dataset Specification # dataset: # The root directory of the dataset. This can be a local # file system directory, an S3 bucket, or a GCP bucket. root_path: https://masterful-public.s3.us-west-1.amazonaws.com/datasets/yymnist # The name of the splits to use in training. These will # point to a CSV file in the "root_dir" of the same name, # such as .csv. Each split can be referenced below # in the "training" and "evaluation" sections. Splits defined # here can either be labeled or unlabeled. splits: [train, test, unlabeled] # OPTIONAL: The name of the label map file. The label map file is a # CSV file where the first entry in each row is the integer label and # the second entry is human readable string class name. The label map file # is used to replace the class id's in the evaluation metrics, for easier # reading. If it does not exist, then the class ids will be used. The # label map file must end in ".csv" and be located at '/.csv' #label_map: label_map # Supported label formats: # classification_csv: [default] # detection_csv: CSV delimited, follows: relative image path, xmin, ymin, xmax, ymax, class_id, ... label_format: detection_csv # OPTIONAL: True if we should save an optimized version of the dataset # locally, False otherwise. Optimizing the dataset locally # adds a small, one-time dataset processing cost in order # to convert the raw dataset into the optimized version. But doing # so will significantly improve training times. The optimization # conversion only happens once. Subsequent training runs # will use optimized version of the dataset. optimize: True # # Model Specification # model: # The name of the architecture to use. The model # returned at the end of training will be based on # the architecture specified below, and will be ready # to use for inference. The model will include all # preprocessing and standardization, and will expect # single, unresized 8-bit 3 channel RGB uint8/uint32 images # with pixel ranges [0,255]. architecture: ssd_mobilenet_v2 # The number of classes in the training dataset and model predictions. # For binary_classification, this should be set to 1. num_classes: 10 # The input shape to use for training. All data will be transformed # using an aspect-ratio preserving resize to this shape, which is # in HWC format. Note larger input shapes will take more memory # and be longer to train, but preserve the most detail. Smaller # input shapes will train faster, but could lose useful detail # in the image features. Input shape is specified as # [height, width, num_channels]. num_channels must be 3. input_shape: [416,416,3] # # Training Specification # training: # The task to perform. Currently, the trainer support the # following tasks: # classification - Multi-class classification task. # binary_classification - Binary classification task. # multilabel_classification - Multi-class Multi-label classification task. task: detection # The dataset set to use for training. This must be a labeled # dataset. training_split: train # OPTIONAL: The dataset split to use for validation. If no split # is set here, then a validation split will be created automatically # from the training dataset split. #validation_split: validation # OPTIONAL: The unlabeled split to use for training. unlabeled_split: unlabeled # # Output Specification output: # A list of output formats for the trained model. # # Supported output formats are: # saved_model - Tensorflow Saved Model format (https://www.tensorflow.org/guide/saved_model) # onnx - Open Neural Network Exchange model format (https://onnx.ai/) formats: [saved_model, onnx] # The path to save the output into. path: ~/model_output # # Evaluation Specification # evaluation: # The dataset split to use for evaluation. This should # be a dataset split that is not used in training, otherwise # your evaluation metrics will not be representative of the generalization # performance of your model. split: test