# @package _global_
config:
  VERBOSE: True
  LOG_FREQUENCY: 200
  TEST_ONLY: False
  TEST_EVERY_NUM_EPOCH: 1
  TEST_MODEL: True
  SEED_VALUE: 1
  MULTI_PROCESSING_METHOD: forkserver
  HOOKS:
    PERF_STATS:
      MONITOR_PERF_STATS: True
  DATA:
    NUM_DATALOADER_WORKERS: 5
    TRAIN:
      DATA_SOURCES: [torchvision_dataset]
      LABEL_SOURCES: [torchvision_dataset]
      DATASET_NAMES: [CIFAR10]
      BATCHSIZE_PER_REPLICA: 32
      TRANSFORMS:
        - name: Resize
          size: 224
        - name: RandomHorizontalFlip
        - name: ToTensor
        - name: Normalize
          mean: [0.485, 0.456, 0.406]
          std: [0.229, 0.224, 0.225]
      MMAP_MODE: True
      COPY_TO_LOCAL_DISK: False
      COPY_DESTINATION_DIR: /tmp/cifar10/
    TEST:
      DATA_SOURCES: [torchvision_dataset]
      LABEL_SOURCES: [torchvision_dataset]
      DATASET_NAMES: [CIFAR10]
      BATCHSIZE_PER_REPLICA: 32
      TRANSFORMS:
        - name: Resize
          size: 224
        - name: ToTensor
        - name: Normalize
          mean: [0.485, 0.456, 0.406]
          std: [0.229, 0.224, 0.225]
      MMAP_MODE: True
      COPY_TO_LOCAL_DISK: False
      COPY_DESTINATION_DIR: /tmp/cifar10/
  METERS:
    name: accuracy_list_meter
    accuracy_list_meter:
      num_meters: 1
      topk_values: [1]
  TRAINER:
    TRAIN_STEP_NAME: standard_train_step
  MODEL:
    FEATURE_EVAL_SETTINGS:
      EVAL_MODE_ON: True
      FREEZE_TRUNK_ONLY: True
      SHOULD_FLATTEN_FEATS: False
      LINEAR_EVAL_FEAT_POOL_OPS_MAP: [
        ["res5", ["AdaptiveAvgPool2d", [[1, 1]]]],
      ]
    TRUNK:
      NAME: resnet
      RESNETS:
        DEPTH: 50
    HEAD:
      PARAMS: [
        ["eval_mlp", {"in_channels": 2048, "dims": [2048, 10]}],
      ]
    WEIGHTS_INIT:
      PARAMS_FILE: "specify the model weights"
      STATE_DICT_KEY_NAME: classy_state_dict
      # STATE_DICT_KEY_NAME: model_state_dict
    SYNC_BN_CONFIG:
      CONVERT_BN_TO_SYNC_BN: True
      SYNC_BN_TYPE: apex
      GROUP_SIZE: 8
  LOSS:
    name: cross_entropy_multiple_output_single_target
    cross_entropy_multiple_output_single_target:
      ignore_index: -1
  OPTIMIZER:
      name: adamw
      # In the OSS Caffe2 benchmark, RN50 models use 1e-4 and AlexNet models 5e-4
      weight_decay: 0.0001
      momentum: 0.9
      num_epochs: 28
      nesterov: True
      regularize_bn: False
      regularize_bias: True
      param_schedulers:
        lr:
          auto_lr_scaling:
            auto_scale: true
            base_value: 0.01
            base_lr_batch_size: 512
          name: multistep
          values: [0.01, 0.001, 0.0001, 0.00001]
          milestones: [5, 7, 10]
          update_interval: epoch
  DISTRIBUTED:
    BACKEND: nccl
    NUM_NODES: 1
    NUM_PROC_PER_NODE: 8
    INIT_METHOD: tcp
    RUN_ID: auto
  MACHINE:
    DEVICE: gpu
  CHECKPOINT:
    DIR: "."
    AUTO_RESUME: True
    CHECKPOINT_FREQUENCY: 1
