precision:
    AMP:
        static_loss_scale: 128
        amp: True
    FP32:
        amp: False
    TF32:
        amp: False

platform:
    DGX1V-16G:
        workers: 8
        prefetch: 4
        gpu_affinity: socket_unique_contiguous
    DGX1V-32G:
        workers: 8
        prefetch: 4
        gpu_affinity: socket_unique_contiguous
    T4:
        workers: 8
    DGX1V:
        workers: 8
        prefetch: 4
        gpu_affinity: socket_unique_contiguous
    DGX2V:
        workers: 8
        prefetch: 4
        gpu_affinity: socket_unique_contiguous
    DGXA100:
        workers: 10
        prefetch: 4
        gpu_affinity: socket_unique_contiguous

mode:
    benchmark_training: &benchmark_training
        print_freq: 1
        epochs: 3
        training_only: True
        evaluate: False
        save_checkpoints: False
    benchmark_training_short:
        <<: *benchmark_training
        epochs: 1
        data_backend: synthetic
        prof: 100
    benchmark_inference: &benchmark_inference
        print_freq: 1
        epochs: 1
        training_only: False
        evaluate: True
        save_checkpoints: False
    convergence:
        print_freq: 20
        training_only: False
        evaluate: False
        save_checkpoints: True
    evaluate:
        print_freq: 20
        training_only: False
        evaluate: True
        epochs: 1
        save_checkpoints: False

anchors:
    # ResNet_like params: {{{
    resnet_params: &resnet_params
        label_smoothing: 0.1
        mixup: 0.2
        lr_schedule: cosine
        momentum: 0.875
        warmup: 8
        epochs: 250
        data_backend: pytorch
        num_classes: 1000
        image_size: 224
        interpolation: bilinear
    resnet_params_896: &resnet_params_896
        <<: *resnet_params
        optimizer_batch_size: 896
        lr: 0.896
        weight_decay: 6.103515625e-05
    resnet_params_1k: &resnet_params_1k
        <<: *resnet_params
        optimizer_batch_size: 1024
        lr: 1.024
        weight_decay: 6.103515625e-05
    resnet_params_2k: &resnet_params_2k
        <<: *resnet_params
        optimizer_batch_size: 2048
        lr: 2.048
        weight_decay: 3.0517578125e-05
    resnet_params_4k: &resnet_params_4k
        <<: *resnet_params
        optimizer_batch_size: 4096
        lr: 4.096
        weight_decay: 3.0517578125e-05
    # }}}
    # EfficienNet Params: {{{
    efficientnet_params: &efficientnet_params
        optimizer: rmsprop
        rmsprop_alpha: 0.9
        rmsprop_eps: 0.01
        print_freq: 100
        label_smoothing: 0.1
        mixup: 0.2
        lr_schedule: cosine
        momentum: 0.9
        warmup: 16 
        epochs: 400
        data_backend: pytorch
        augmentation: autoaugment
        num_classes: 1000
        interpolation: bicubic
    efficientnet_b0_params_4k: &efficientnet_b0_params_4k
        <<: *efficientnet_params
        optimizer_batch_size: 4096
        lr: 0.08
        weight_decay: 1e-05
        image_size: 224
    efficientnet_b4_params_4k: &efficientnet_b4_params_4k
        <<: *efficientnet_params
        optimizer_batch_size: 4096
        lr: 0.16
        weight_decay: 5e-06
        image_size: 380
    # }}}
models:
    resnet50: # {{{
        DGX1V: &RN50_DGX1V
            AMP:
                <<: *resnet_params_2k
                arch: resnet50
                batch_size: 256
                memory_format: nhwc
            FP32:
                <<: *resnet_params_896
                batch_size: 112
        DGX1V-16G:
            <<: *RN50_DGX1V
        DGX1V-32G:
            <<: *RN50_DGX1V
        DGX2V:
            AMP:
                <<: *resnet_params_4k
                arch: resnet50
                batch_size: 256
                memory_format: nhwc
            FP32:
                <<: *resnet_params_4k
                arch: resnet50
                batch_size: 256
        DGXA100:
            AMP:
                <<: *resnet_params_2k
                arch: resnet50
                batch_size: 256
                memory_format: nhwc
            TF32:
                <<: *resnet_params_2k
                arch: resnet50
                batch_size: 256
        T4:
            AMP:
                <<: *resnet_params_2k
                arch: resnet50
                batch_size: 256
                memory_format: nhwc
            FP32:
                <<: *resnet_params_2k
                batch_size: 128
    # }}}
    resnext101-32x4d: # {{{
        DGX1V: &RNXT_DGX1V
            AMP:
                <<: *resnet_params_1k
                arch: resnext101-32x4d
                batch_size: 128
                memory_format: nhwc
            FP32:
                <<: *resnet_params_1k
                arch: resnext101-32x4d
                batch_size: 64
        DGX1V-16G:
            <<: *RNXT_DGX1V
        DGX1V-32G:
            <<: *RNXT_DGX1V
        DGXA100:
            AMP:
                <<: *resnet_params_1k
                arch: resnext101-32x4d
                batch_size: 128
                memory_format: nhwc
            TF32:
                <<: *resnet_params_1k
                arch: resnext101-32x4d
                batch_size: 128
        T4:
            AMP:
                <<: *resnet_params_1k
                arch: resnext101-32x4d
                batch_size: 128
                memory_format: nhwc
            FP32:
                <<: *resnet_params_1k
                arch: resnext101-32x4d
                batch_size: 64
    # }}}
    se-resnext101-32x4d: # {{{
        DGX1V: &SERNXT_DGX1V
            AMP:
                <<: *resnet_params_896
                arch: se-resnext101-32x4d
                batch_size: 112
                memory_format: nhwc
            FP32:
                <<: *resnet_params_1k
                arch: se-resnext101-32x4d
                batch_size: 64
        DGX1V-16G:
            <<: *SERNXT_DGX1V
        DGX1V-32G:
            <<: *SERNXT_DGX1V
        DGXA100:
            AMP:
                <<: *resnet_params_1k
                arch: se-resnext101-32x4d
                batch_size: 128
                memory_format: nhwc
            TF32:
                <<: *resnet_params_1k
                arch: se-resnext101-32x4d
                batch_size: 128
        T4:
            AMP:
                <<: *resnet_params_1k
                arch: se-resnext101-32x4d
                batch_size: 128
                memory_format: nhwc
            FP32:
                <<: *resnet_params_1k
                arch: se-resnext101-32x4d
                batch_size: 64
    # }}}
    efficientnet-widese-b0: # {{{
        T4:
            AMP:
                <<: *efficientnet_b0_params_4k
                arch: efficientnet-widese-b0
                batch_size: 128
                memory_format: nhwc
            FP32:
                <<: *efficientnet_b0_params_4k
                arch: efficientnet-widese-b0
                batch_size: 64
        DGX1V-16G:
            AMP:
                <<: *efficientnet_b0_params_4k
                arch: efficientnet-widese-b0
                batch_size: 128
                memory_format: nhwc
            FP32:
                <<: *efficientnet_b0_params_4k
                arch: efficientnet-widese-b0
                batch_size: 64
        DGX1V-32G:
            AMP:
                <<: *efficientnet_b0_params_4k
                arch: efficientnet-widese-b0
                batch_size: 256
                memory_format: nhwc
            FP32:
                <<: *efficientnet_b0_params_4k
                arch: efficientnet-widese-b0
                batch_size: 128
        DGXA100:
            AMP:
                <<: *efficientnet_b0_params_4k
                arch: efficientnet-widese-b0
                batch_size: 256
                memory_format: nhwc
            TF32:
                <<: *efficientnet_b0_params_4k
                arch: efficientnet-widese-b0
                batch_size: 256
    # }}}
    efficientnet-b0: # {{{
        T4:
            AMP:
                <<: *efficientnet_b0_params_4k
                arch: efficientnet-b0
                batch_size: 128
                memory_format: nhwc
            FP32:
                <<: *efficientnet_b0_params_4k
                arch: efficientnet-b0
                batch_size: 64
        DGX1V-16G:
            AMP:
                <<: *efficientnet_b0_params_4k
                arch: efficientnet-b0
                batch_size: 128
                memory_format: nhwc
            FP32:
                <<: *efficientnet_b0_params_4k
                arch: efficientnet-b0
                batch_size: 64
        DGX1V-32G:
            AMP:
                <<: *efficientnet_b0_params_4k
                arch: efficientnet-b0
                batch_size: 256
                memory_format: nhwc
            FP32:
                <<: *efficientnet_b0_params_4k
                arch: efficientnet-b0
                batch_size: 128
        DGXA100:
            AMP:
                <<: *efficientnet_b0_params_4k
                arch: efficientnet-b0
                batch_size: 256
                memory_format: nhwc
            TF32:
                <<: *efficientnet_b0_params_4k
                arch: efficientnet-b0
                batch_size: 256
    # }}}
    efficientnet-quant-b0: # {{{
        T4:
            AMP:
                <<: *efficientnet_b0_params_4k
                arch: efficientnet-quant-b0
                batch_size: 128
                memory_format: nhwc
            FP32:
                <<: *efficientnet_b0_params_4k
                arch: efficientnet-quant-b0
                batch_size: 64
        DGX1V-16G:
            AMP:
                <<: *efficientnet_b0_params_4k
                arch: efficientnet-quant-b0
                batch_size: 128
                memory_format: nhwc
            FP32:
                <<: *efficientnet_b0_params_4k
                arch: efficientnet-quant-b0
                batch_size: 64
        DGX1V-32G:
            AMP:
                <<: *efficientnet_b0_params_4k
                arch: efficientnet-quant-b0
                batch_size: 256
                memory_format: nhwc
            FP32:
                <<: *efficientnet_b0_params_4k
                arch: efficientnet-quant-b0
                batch_size: 128
        DGXA100:
            AMP:
                <<: *efficientnet_b0_params_4k
                arch: efficientnet-quant-b0
                batch_size: 256
                memory_format: nhwc
            TF32:
                <<: *efficientnet_b0_params_4k
                arch: efficientnet-quant-b0
                batch_size: 256
    # }}}
    efficientnet-widese-b4: # {{{
        T4:
            AMP:
                <<: *efficientnet_b4_params_4k
                arch: efficientnet-widese-b4
                batch_size: 32
                memory_format: nhwc
            FP32:
                <<: *efficientnet_b4_params_4k
                arch: efficientnet-widese-b4
                batch_size: 16
        DGX1V-16G:
            AMP:
                <<: *efficientnet_b4_params_4k
                arch: efficientnet-widese-b4
                batch_size: 32
                memory_format: nhwc
            FP32:
                <<: *efficientnet_b4_params_4k
                arch: efficientnet-widese-b4
                batch_size: 16
        DGX1V-32G:
            AMP:
                <<: *efficientnet_b4_params_4k
                arch: efficientnet-widese-b4
                batch_size: 64
                memory_format: nhwc
            FP32:
                <<: *efficientnet_b4_params_4k
                arch: efficientnet-widese-b4
                batch_size: 32
        DGXA100:
            AMP:
                <<: *efficientnet_b4_params_4k
                arch: efficientnet-widese-b4
                batch_size: 128
                memory_format: nhwc
            TF32:
                <<: *efficientnet_b4_params_4k
                arch: efficientnet-widese-b4
                batch_size: 64
    # }}}
    efficientnet-b4: # {{{
        T4:
            AMP:
                <<: *efficientnet_b4_params_4k
                arch: efficientnet-b4
                batch_size: 32
                memory_format: nhwc
            FP32:
                <<: *efficientnet_b4_params_4k
                arch: efficientnet-b4
                batch_size: 16
        DGX1V-16G:
            AMP:
                <<: *efficientnet_b4_params_4k
                arch: efficientnet-b4
                batch_size: 32
                memory_format: nhwc
            FP32:
                <<: *efficientnet_b4_params_4k
                arch: efficientnet-b4
                batch_size: 16
        DGX1V-32G:
            AMP:
                <<: *efficientnet_b4_params_4k
                arch: efficientnet-b4
                batch_size: 64
                memory_format: nhwc
            FP32:
                <<: *efficientnet_b4_params_4k
                arch: efficientnet-b4
                batch_size: 32
        DGXA100:
            AMP:
                <<: *efficientnet_b4_params_4k
                arch: efficientnet-b4
                batch_size: 128
                memory_format: nhwc
            TF32:
                <<: *efficientnet_b4_params_4k
                arch: efficientnet-b4
                batch_size: 64
    # }}}
    efficientnet-quant-b4: # {{{
        T4:
            AMP:
                <<: *efficientnet_b4_params_4k
                arch: efficientnet-quant-b4
                batch_size: 32
                memory_format: nhwc
            FP32:
                <<: *efficientnet_b4_params_4k
                arch: efficientnet-quant-b4
                batch_size: 16
        DGX1V-16G:
            AMP:
                <<: *efficientnet_b4_params_4k
                arch: efficientnet-quant-b4
                batch_size: 32
                memory_format: nhwc
            FP32:
                <<: *efficientnet_b4_params_4k
                arch: efficientnet-quant-b4
                batch_size: 16
        DGX1V-32G:
            AMP:
                <<: *efficientnet_b4_params_4k
                arch: efficientnet-quant-b4
                batch_size: 64
                memory_format: nhwc
            FP32:
                <<: *efficientnet_b4_params_4k
                arch: efficientnet-quant-b4
                batch_size: 32
        DGXA100:
            AMP:
                <<: *efficientnet_b4_params_4k
                arch: efficientnet-quant-b4
                batch_size: 128
                memory_format: nhwc
            TF32:
                <<: *efficientnet_b4_params_4k
                arch: efficientnet-quant-b4
                batch_size: 64
    # }}}
