defaults:
  - datasets@dataset: bmd
  - _self_
location: ??? # type the path to the location of a 'workspace' directory, with subdirectories 'code' and 'data'
dataset:
  location: ${location}
output_dir: ${location}/workspace/code/hydra_outputs/
hydra:
  run:
    dir: ${output_dir}/${now:%Y-%m-%d}/${now:%H-%M-%S}
models_folder: ??? # name the folder where the models' features will be saved
other_folders_compare: # optional, to compare results between different runs (saved in separate models' folders)
comparison_variable: "ModelType"
# ^options: "ModelType", "ArchType", "ModelTypeFolder"
n_permutations: 1000 # for permutation test
device: cuda  # cpu or cuda
models:
  # copy models from configs.models here, as a list
  # default list is all 92 models (so all models excluding the 7 other-action-dataset models)
  # rsa_skips only with '_' and no '.'
  - name: 'AlexNet'
    type: 'image-in1k'
    archtype: 'CNNs'
    params: 61
    set: 'Standard'
    rsa_skips: ['classifier_0', 'classifier_1', 'classifier_2', 'classifier_3', 'classifier_4', 'classifier_5',
                 'avgpool']

  - name: 'Densenet161'
    type: 'image-in1k'
    archtype: 'CNNs'
    params: 28
    set: 'Standard'
    rsa_skips: ['features_norm0', 'features_norm5', 'features_pool0', 'features_relu0']

  - name: 'Densenet201'
    type: 'image-in1k'
    archtype: 'CNNs'
    params: 20
    set: 'Standard'
    rsa_skips: ['features_norm0', 'features_norm5', 'features_pool0', 'features_relu0']

  - name: 'efficientnet_b3'
    type: 'image-in1k'
    archtype: 'CNNs'
    params: 12
    set: 'Standard'
    rsa_skips: ['avgpool','classifier_0']

  - name: 'efficientnet_b6'
    type: 'image-in1k'
    archtype: 'CNNs'
    params: 43
    set: 'Standard'
    rsa_skips: ['avgpool','classifier_0']

  - name: 'regnet_x_16gf'
    type: 'image-in1k'
    archtype: 'CNNs'
    params: 54
    set: 'Standard'
    rsa_skips: ['avgpool']

  - name: 'regnet_y_8gf'
    type: 'image-in1k'
    archtype: 'CNNs'
    params: 39
    set: 'Standard'
    rsa_skips: ['avgpool']

  - name: 'ResNet34'
    type: 'image-in1k'
    archtype: 'CNNs'
    params: 21
    set: 'Standard'
    rsa_skips: [ 'bn1', 'relu', 'maxpool', 'avgpool']

  - name: 'ResNet50'
    type: 'image-in1k'
    archtype: 'CNNs'
    params: 25
    set: 'Standard'
    rsa_skips: [ 'bn1', 'relu', 'maxpool', 'avgpool']

  - name: 'ResNet101'
    type: 'image-in1k'
    archtype: 'CNNs'
    params: 44
    set: 'Standard'
    rsa_skips: [ 'bn1', 'relu', 'maxpool', 'avgpool']

  - name: 'ResNet152'
    type: 'image-in1k'
    archtype: 'CNNs'
    params: 60
    set: 'Standard'
    rsa_skips: [ 'bn1', 'relu', 'maxpool', 'avgpool']

  - name: 'resnext50_32x4d'
    type: 'image-in1k'
    archtype: 'CNNs'
    params: 25
    set: 'Standard'
    rsa_skips: [ 'bn1', 'relu', 'maxpool', 'avgpool']

  - name: 'resnext101_32x8d'
    type: 'image-in1k'
    archtype: 'CNNs'
    params: 88
    set: 'Standard'
    rsa_skips: [ 'bn1', 'relu', 'maxpool', 'avgpool']

  - name: 'VGG11'
    type: 'image-in1k'
    archtype: 'CNNs'
    params: 132
    set: 'Standard'
    rsa_skips: [ 'classifier_0', 'classifier_1', 'classifier_2', 'classifier_3', 'classifier_4', 'classifier_5', 'avgpool',
                 'features_1', 'features_2', 'features_3', 'features_4', 'features_5',
                 'features_7', 'features_8', 'features_9', 'features_10', 'features_12', 'features_13', 'features_14',
                 'features_15', 'features_17', 'features_19', 'features_20' ]


  - name: 'VGG11_bn'
    type: 'image-in1k'
    archtype: 'CNNs'
    params: 132
    set: 'Standard'
    rsa_skips: [ 'classifier_0', 'classifier_1', 'classifier_2', 'classifier_3', 'classifier_4', 'classifier_5', 'avgpool',
                 'features_1', 'features_2', 'features_3', 'features_4', 'features_5', 'features_6',
                 'features_7', 'features_9', 'features_10', 'features_11', 'features_12', 'features_13',
                 'features_14', 'features_16', 'features_17', 'features_18', 'features_19', 'features_20', 'features_21',
                 'features_23', 'features_24', 'features_26', 'features_27', 'features_28' ]

  - name: 'VGG19'
    type: 'image-in1k'
    archtype: 'CNNs'
    params: 143
    set: 'Standard'
    rsa_skips: [ 'classifier_0', 'classifier_1', 'classifier_2', 'classifier_3', 'classifier_4', 'classifier_5','avgpool',
                 'features_1', 'features_2', 'features_3', 'features_4', 'features_6', 'features_7',
                 'features_8', 'features_9', 'features_11', 'features_12', 'features_13',
                 'features_14', 'features_15', 'features_16', 'features_17', 'features_18',
                 'features_20', 'features_21', 'features_22', 'features_23', 'features_24', 'features_25',
                 'features_26', 'features_27', 'features_29', 'features_30', 'features_31',
                 'features_32', 'features_33', 'features_35', 'features_36' ]

  - name: 'VGG19_bn'
    type: 'image-in1k'
    archtype: 'CNNs'
    params: 143
    set: 'Standard'
    rsa_skips: [ 'classifier_0', 'classifier_1', 'classifier_2', 'classifier_3', 'classifier_4', 'classifier_5', 'avgpool',
                 'features_1', 'features_2', 'features_3', 'features_4', 'features_5', 'features_6',
                 'features_8', 'features_9', 'features_10', 'features_11', 'features_12',
                 'features_13', 'features_15', 'features_16', 'features_17', 'features_18',
                 'features_19', 'features_21', 'features_22', 'features_23', 'features_24', 'features_25',
                 'features_26', 'features_28', 'features_29', 'features_30', 'features_31',
                 'features_32', 'features_34', 'features_35', 'features_36', 'features_37', 'features_38',
                 'features_39', 'features_41', 'features_42', 'features_43', 'features_44', 'features_45',
                 'features_47', 'features_48', 'features_50', 'features_51', 'features_52' ]

  - name: 'wide_resnet50_2'
    type: 'image-in1k'
    archtype: 'CNNs'
    params: 68
    set: 'Standard'
    rsa_skips: [ 'bn1', 'relu', 'maxpool', 'avgpool']

  - name: 'wide_resnet101_2'
    type: 'image-in1k'
    archtype: 'CNNs'
    params: 126
    set: 'Standard'
    rsa_skips: [ 'bn1', 'relu', 'maxpool', 'avgpool' ]

  - name: 'inception_v3' # no rsa_skips
    type: 'image-in1k'
    archtype: 'CNNs'
    params: 23
    set: 'Timm'

  - name: 'inception_v4' # no rsa_skips
    type: 'image-in1k'
    archtype: 'CNNs'
    params: 42
    set: 'Timm'

  - name: 'repvgg_a2' # no rsa_skips
    type: 'image-in1k'
    archtype: 'CNNs'
    params: 28
    set: 'Timm'

  - name: 'repvgg_b2' # no rsa_skips
    type: 'image-in1k'
    archtype: 'CNNs'
    params: 89
    set: 'Timm'

  - name: 'seresnet50'
    type: 'image-in1k'
    archtype: 'CNNs'
    params: 28
    set: 'Timm'
    rsa_skips: ['maxpool', 'bn1', 'act1']

  - name: 'seresnext50_32x4d'
    type: 'image-in1k'
    archtype: 'CNNs'
    params: 27
    set: 'Timm'
    rsa_skips: ['maxpool', 'bn1', 'act1']

  - name: 'xception41' # no rsa_skips
    type: 'image-in1k'
    archtype: 'CNNs'
    params: 27
    set: 'Timm'

  - name: 'xception71' # no rsa_skips
    type: 'image-in1k'
    archtype: 'CNNs'
    params: 42
    set: 'Timm'

  - name: 'cait_s24_224'
    type: 'image-in1k'
    archtype: 'Transformers'
    params: 46
    set: 'Timm'
    rsa_skips: ['patch_embed_norm', 'patch_embed_proj', 'pos_drop', 'norm']

  - name: 'cait_xxs24_224'
    type: 'image-in1k'
    archtype: 'Transformers'
    params: 12
    set: 'Timm'
    rsa_skips: ['patch_embed_norm', 'patch_embed_proj', 'pos_drop', 'norm']

  - name: 'convit_small'
    type: 'image-in1k'
    archtype: 'Transformers'
    params: 27
    set: 'Timm'
    rsa_skips: ['patch_embed_norm', 'patch_embed_proj', 'pos_drop', 'norm']

  - name: 'convit_base'
    type: 'image-in1k'
    archtype: 'Transformers'
    params: 86
    set: 'Timm'
    rsa_skips: ['patch_embed_norm', 'patch_embed_proj', 'pos_drop', 'norm']

  - name: 'deit_small_patch16_224'
    type: 'image-in1k'
    archtype: 'Transformers'
    params: 22
    set: 'Timm'
    rsa_skips: ['patch_embed_norm', 'patch_embed_proj', 'pos_drop', 'norm', 'pre_logits']

  - name: 'deit_base_patch16_224'
    type: 'image-in1k'
    archtype: 'Transformers'
    params: 86
    set: 'Timm'
    rsa_skips: ['patch_embed_norm', 'patch_embed_proj', 'pos_drop', 'norm', 'pre_logits']

  - name: 'swin_tiny_patch4_window7_224'
    type: 'image-in1k'
    archtype: 'Transformers'
    params: 28
    set: 'Timm'
    layers: ['layers.0', 'layers.1', 'layers.2.blocks.1', 'layers.2.blocks.3', 'layers.2.blocks.5', 'layers.2.blocks.7',
             'layers.2.blocks.9', 'layers.2.blocks.11', 'layers.2.blocks.13', 'layers.2.blocks.15',
             'layers.2.blocks.17', 'layers.3', 'head']
    rsa_skips: []

  - name: 'swin_small_patch4_window7_224'
    type: 'image-in1k'
    archtype: 'Transformers'
    params: 49
    set: 'Timm'
    layers: ['layers.0', 'layers.1', 'layers.2.blocks.1', 'layers.2.blocks.3', 'layers.2.blocks.5', 'layers.2.blocks.7',
             'layers.2.blocks.9', 'layers.2.blocks.11', 'layers.2.blocks.13', 'layers.2.blocks.15',
             'layers.2.blocks.17', 'layers.3', 'head']
    rsa_skips: []

  - name: 'swin_base_patch4_window7_224'
    type: 'image-in1k'
    archtype: 'Transformers'
    params: 87
    set: 'Timm'
    layers: ['layers.0', 'layers.1', 'layers.2.blocks.1', 'layers.2.blocks.3', 'layers.2.blocks.5', 'layers.2.blocks.7',
             'layers.2.blocks.9', 'layers.2.blocks.11', 'layers.2.blocks.13', 'layers.2.blocks.15',
             'layers.2.blocks.17', 'layers.3', 'head']
    rsa_skips: []

  - name: 'twins_pcpvt_base'
    type: 'image-in1k'
    archtype: 'Transformers'
    params: 43
    set: 'Timm'
    layers: ['blocks.0.1', 'blocks.1.1', 'blocks.2.1', 'blocks.2.3', 'blocks.2.5', 'blocks.2.7', 'blocks.2.9',
             'blocks.2.11', 'blocks.2.13', 'blocks.2.15', 'blocks.2.17', 'blocks.3.1', 'head']
    rsa_skips: []

  - name: 'vit_small_patch16_224'
    type: 'image-in1k'
    archtype: 'Transformers'
    params: 22
    set: 'Timm'
    rsa_skips: ['patch_embed_norm', 'patch_embed_proj', 'pos_drop', 'norm', 'pre_logits']

  - name: 'vit_base_patch16_224'
    type: 'image-in1k'
    archtype: 'Transformers'
    params: 86
    set: 'Timm'
    rsa_skips: ['patch_embed_norm', 'patch_embed_proj', 'pos_drop', 'norm', 'pre_logits']

  - name: 'mvitv2_small_in1k'
    type: 'image-in1k'
    archtype: 'Transformers'
    params: 34.9
    set: # leave None
    netset_fallback: 'Timm'
    cfg:
      _target_: repralign.models.loading.timm_loader
      model_name: 'mvitv2_small.fb_in1k'
    layers: ['stages.0', 'stages.1', 'stages.2.blocks.1', 'stages.2.blocks.3', 'stages.2.blocks.5', 'stages.2.blocks.7',
             'stages.2.blocks.9', 'stages.3', 'head']
    rsa_skips: []

  - name: 'mvitv2_base_in1k'
    type: 'image-in1k'
    archtype: 'Transformers'
    params: 51.5
    set: # leave None
    netset_fallback: 'Timm'
    cfg:
      _target_: repralign.models.loading.timm_loader
      model_name: 'mvitv2_base.fb_in1k'
    layers: ['stages.0', 'stages.1', 'stages.2.blocks.1', 'stages.2.blocks.3', 'stages.2.blocks.5', 'stages.2.blocks.7',
             'stages.2.blocks.9', 'stages.2.blocks.11', 'stages.2.blocks.13', 'stages.2.blocks.15', 'stages.3', 'head']
    rsa_skips: []

  - name: 'c2d_r50_nopool_mma'
    type: 'image-k400'
    archtype: 'CNNs'
    params: 24.3
    flops: 33
    accuracy: 73.44
    time_as: 'score_avg'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 8
      frame_interval: 8
      resize_size: 224
      crop_type: 'center_crop'
      crop_size: 224
      format_shape: 'NCHW'
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/c2d/c2d_r50-in1k-pre-nopool_8xb32-8x8x1-100e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/c2d/c2d_r50-in1k-pre-nopool_8xb32-8x8x1-100e_kinetics400-rgb_20221027-e0227b22.pth
    layers: [ 'backbone.conv1', 'backbone.layer1', 'backbone.layer2', 'backbone.layer3', 'backbone.layer4', 'cls_head' ]
    rsa_skips: []

  - name: 'c2d_r101_nopool_mma'
    type: 'image-k400'
    archtype: 'CNNs'
    params: 43.3
    flops: 63
    accuracy: 74.97
    time_as: 'score_avg'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 8
      frame_interval: 8
      resize_size: 224
      crop_type: 'center_crop'
      crop_size: 224
      format_shape: 'NCHW'
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/c2d/c2d_r101-in1k-pre-nopool_8xb32-8x8x1-100e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/c2d/c2d_r101-in1k-pre-nopool_8xb32-8x8x1-100e_kinetics400-rgb_20221027-557bd8bc.pth
    rsa_skips: ['backbone_maxpool','cls_head_avg_pool', 'cls_head_consensus', 'cls_head_dropout']

  - name: 'c2d_r50_pool8_mma'
    type: 'image-k400'
    archtype: 'CNNs'
    params: 24.3
    flops: 19
    accuracy: 73.89
    time_as: 'image_agg'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 8
      frame_interval: 8
      resize_size: 224
      crop_type: 'center_crop'
      crop_size: 224
      format_shape: 'NCTHW'
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/c2d/c2d_r50-in1k-pre_8xb32-8x8x1-100e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/c2d/c2d_r50-in1k-pre_8xb32-8x8x1-100e_kinetics400-rgb_20221027-3ca304fa.pth
    rsa_skips: ['backbone_maxpool3d_1', 'backbone_maxpool3d_2','cls_head_avg_pool', 'cls_head_dropout']

  - name: 'c2d_r50_pool16_mma'
    type: 'image-k400'
    archtype: 'CNNs'
    params: 24.3
    flops: 39
    accuracy: 74.97
    time_as: 'image_agg'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 16
      frame_interval: 4
      resize_size: 224
      crop_type: 'center_crop'
      crop_size: 224
      format_shape: 'NCTHW'
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/c2d/c2d_r50-in1k-pre_8xb32-16x4x1-100e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/c2d/c2d_r50-in1k-pre_8xb32-16x4x1-100e_kinetics400-rgb_20221027-5f382a43.pth
    rsa_skips: ['backbone_maxpool3d_1', 'backbone_maxpool3d_2', 'cls_head_avg_pool', 'cls_head_dropout']

  - name: 'TSN_r50_8'
    type: 'image-k400'
    archtype: 'CNNs'
    params: 24.33
    flops: 102.7
    accuracy: 74.12
    time_as: 'score_avg'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 1
      frame_interval: 1
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 224
      format_shape: 'NCHW'
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x8-100e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x8-100e_kinetics400-rgb_20220906-2692d16c.pth
    rsa_skips: ['backbone_maxpool', 'cls_head_avg_pool', 'cls_head_consensus', 'cls_head_dropout']

  - name: 'TSN_r101_8'
    type: 'image-k400'
    archtype: 'CNNs'
    params: 43.32
    flops: 195.8
    accuracy: 75.89
    time_as: 'score_avg'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 1
      frame_interval: 1
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 224
      format_shape: 'NCHW'
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/tsn/tsn_imagenet-pretrained-r101_8xb32-1x1x8-100e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/tsn/tsn_imagenet-pretrained-r101_8xb32-1x1x8-100e_kinetics400-rgb_20220906-23cff032.pth
    rsa_skips: ['backbone_maxpool', 'cls_head_avg_pool', 'cls_head_consensus', 'cls_head_dropout']

  - name: 'TSN_d161_3'
    type: 'image-k400'
    archtype: 'CNNs'
    params: 27.36
    flops: 194.6
    accuracy: 72.07
    time_as: 'score_avg'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 1
      frame_interval: 1
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 224
      format_shape: 'NCHW'
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/tsn/custom_backbones/tsn_imagenet-pretrained-dense161_8xb32-1x1x3-100e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/tsn/tsn_imagenet-pretrained-dense161_8xb32-1x1x3-100e_kinetics400-rgb_20220906-5f4c0daf.pth
    rsa_skips: ['backbone_features_norm0', 'backbone_features_norm5', 'backbone_features_pool0',
                'backbone_features_relu0','cls_head_avg_pool', 'cls_head_consensus', 'cls_head_dropout']

  - name: 'TSN_mobones4_8'
    type: 'image-k400'
    archtype: 'CNNs'
    params: 13.72
    flops: 76
    accuracy: 73.65
    time_as: 'score_avg'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 1
      frame_interval: 1
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 224
      format_shape: 'NCHW'
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/tsn/custom_backbones/tsn_imagenet-pretrained-mobileone-s4_8xb32-1x1x8-100e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/tsn/tsn_imagenet-pretrained-mobileone-s4_8xb32-1x1x8-100e_kinetics400-rgb_20230825-2da3c1f7.pth
    rsa_skips: ['cls_head_avg_pool', 'cls_head_consensus', 'cls_head_dropout']

  - name: 'TSN_swin_8'
    type: 'image-k400'
    archtype: 'Transformers'
    params: 87.15
    flops: 386.7
    accuracy: 79.22
    time_as: 'score_avg'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 1
      frame_interval: 1
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 224
      format_shape: 'NCHW'
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/tsn/custom_backbones/tsn_imagenet-pretrained-swin-transformer_32xb8-1x1x8-50e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/tsn/tsn_imagenet-pretrained-swin-transformer_32xb8-1x1x8-50e_kinetics400-rgb_20230530-428f0064.pth
    rsa_skips: ['backbone_patch_embed_proj', 'backbone_patch_embed_norm', 'backbone_avgpool', 'backbone_norm',
                'backbone_pos_drop', 'cls_head_avg_pool', 'cls_head_consensus', 'cls_head_dropout']

  - name: 'timesformer_spaceOnly'
    type: 'image-k400'
    archtype: 'Transformers'
    params: 86.11
    flops: 141
    accuracy: 76.93
    time_as: 'score_avg'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 8
      frame_interval: 32
      resize_size: 224
      crop_type: 'center_crop'
      crop_size: 224
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb_20220815-78f05367.pth
    rsa_skips: ['backbone_patch_embed_projection', 'backbone_drop_after_pos', 'backbone_norm']

  - name: 'ircsn_r152'
    type: 'video-k400'
    archtype: 'CNNs'
    params: 29.7
    flops: 97.6
    accuracy: 76.53
    time_as: "input_dim_local"
    pre: 'nopre'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 32
      frame_interval: 2
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 256
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/csn/ircsn_r152_32x2x1-180e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/csn/vmz_ircsn_from_scratch_r152_32x2x1_180e_kinetics400_rgb_20210617-5c933ae1.pth
    rsa_skips: ['backbone_maxpool', 'cls_head_avg_pool', 'cls_head_dropout']

  - name: 'ircsn_bnfrozen_r152_ig65m'
    type: 'video-k400'
    archtype: 'CNNs'
    params: 29.7
    flops: 97.63
    accuracy: 82.84
    time_as: "input_dim_local"
    pre: 'prevideo'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 32
      frame_interval: 2
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 256
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/csn/ircsn_ig65m-pretrained-r152-bnfrozen_8xb12-32x2x1-58e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/csn/ircsn_ig65m-pretrained-r152-bnfrozen_8xb12-32x2x1-58e_kinetics400-rgb_20220811-7d1dacde.pth
    rsa_skips: ['backbone_maxpool', 'cls_head_avg_pool', 'cls_head_dropout']

  - name: 'ircsn_bnfrozen_r50_ig65m'
    type: 'video-k400'
    archtype: 'CNNs'
    params: 13.13
    flops: 55.90
    accuracy: 79.44
    time_as: "input_dim_local"
    pre: 'prevideo'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 32
      frame_interval: 2
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 256
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/csn/ircsn_ig65m-pretrained-r50-bnfrozen_8xb12-32x2x1-58e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/csn/ircsn_ig65m-pretrained-r50-bnfrozen_8xb12-32x2x1-58e_kinetics400-rgb_20220811-44395bae.pth
    rsa_skips: ['backbone_maxpool',  'cls_head_avg_pool', 'cls_head_dropout']

  - name: 'ircsn_r152_ig65m'
    type: 'video-k400'
    archtype: 'CNNs'
    params: 29.7
    flops: 97.63
    accuracy: 82.87
    time_as: "input_dim_local"
    pre: 'prevideo'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 32
      frame_interval: 2
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 256
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/csn/ircsn_ig65m-pretrained-r152_8xb12-32x2x1-58e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/csn/ircsn_ig65m-pretrained-r152_8xb12-32x2x1-58e_kinetics400-rgb_20220811-c7a3cc5b.pth
    rsa_skips: ['backbone_maxpool', 'cls_head_avg_pool', 'cls_head_dropout']

  - name: 'ipcsn_bnfrozen_r152_ig65m'
    type: 'video-k400'
    archtype: 'CNNs'
    params: 33.02
    flops: 109.9
    accuracy: 82.68
    time_as: "input_dim_local"
    pre: 'prevideo'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 32
      frame_interval: 2
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 256
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/csn/ipcsn_ig65m-pretrained-r152-bnfrozen_32x2x1-58e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/csn/vmz_ipcsn_ig65m_pretrained_r152_32x2x1_58e_kinetics400_rgb_20210617-c3be9793.pth
    rsa_skips: ['backbone_maxpool', 'cls_head_avg_pool', 'cls_head_dropout']

  - name: 'ipcsn_r152'
    type: 'video-k400'
    archtype: 'CNNs'
    params: 33.02
    flops: 109.9
    accuracy: 77.80
    time_as: "input_dim_local"
    pre: 'nopre'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 32
      frame_interval: 2
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 256
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/csn/ipcsn_r152_32x2x1-180e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/csn/vmz_ipcsn_from_scratch_r152_32x2x1_180e_kinetics400_rgb_20210617-d565828d.pth
    rsa_skips: ['backbone_maxpool', 'cls_head_avg_pool', 'cls_head_dropout']

  - name: 'i3d_r50_mma'
    type: 'video-k400'
    archtype: 'CNNs'
    params: 28.0
    flops: 43.5
    accuracy: 73.47
    time_as: "input_dim_local"
    pre: 'preimage'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 32
      frame_interval: 2
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 256
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/i3d/i3d_imagenet-pretrained-r50_8xb8-32x2x1-100e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/i3d/i3d_imagenet-pretrained-r50_8xb8-32x2x1-100e_kinetics400-rgb_20220812-e213c223.pth
    layers: [ 'backbone.conv1', 'backbone.layer1', 'backbone.layer2', 'backbone.layer3', 'backbone.layer4', 'cls_head' ]
    rsa_skips: []

  - name: 'i3d_r50_dotprod'
    type: 'video-k400'
    archtype: 'CNNs'
    params: 35.4
    flops: 59.3
    accuracy: 74.80
    time_as: "input_dim_localglobal"
    pre: 'preimage'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 32
      frame_interval: 2
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 256
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/i3d/i3d_imagenet-pretrained-r50-nl-dot-product_8xb8-32x2x1-100e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/i3d/i3d_imagenet-pretrained-r50-nl-dot-product_8xb8-32x2x1-100e_kinetics400-rgb_20220812-8e1f2148.pth
    rsa_skips: ['backbone_maxpool', 'backbone_pool2', 'cls_head_avg_pool', 'cls_head_dropout']

  - name: 'i3d_r50_embgauss'
    type: 'video-k400'
    archtype: 'CNNs'
    params: 35.4
    flops: 59.3
    accuracy: 74.73
    time_as: "input_dim_localglobal"
    pre: 'preimage'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 32
      frame_interval: 2
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 256
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/i3d/i3d_imagenet-pretrained-r50-nl-embedded-gaussian_8xb8-32x2x1-100e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/i3d/i3d_imagenet-pretrained-r50-nl-embedded-gaussian_8xb8-32x2x1-100e_kinetics400-rgb_20220812-afd8f562.pth
    rsa_skips: ['backbone_maxpool', 'backbone_pool2', 'cls_head_avg_pool', 'cls_head_dropout']

  - name: 'i3d_r50_gauss'
    type: 'video-k400'
    archtype: 'CNNs'
    params: 31.7
    flops: 56.5
    accuracy: 73.97
    time_as: "input_dim_localglobal"
    pre: 'preimage'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 32
      frame_interval: 2
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 256
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/i3d/i3d_imagenet-pretrained-r50-nl-gaussian_8xb8-32x2x1-100e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/i3d/i3d_imagenet-pretrained-r50-nl-gaussian_8xb8-32x2x1-100e_kinetics400-rgb_20220812-0c5cbf5a.pth
    rsa_skips: ['backbone_maxpool', 'backbone_pool2', 'cls_head_avg_pool', 'cls_head_dropout']

  - name: 'i3d_r50_heavy'
    type: 'video-k400'
    archtype: 'CNNs'
    params: 33.0
    flops: 166.3
    accuracy: 76.21
    time_as: "input_dim_local"
    pre: 'preimage'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 32
      frame_interval: 2
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 256
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/i3d/i3d_imagenet-pretrained-r50-heavy_8xb8-32x2x1-100e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/i3d/i3d_imagenet-pretrained-r50-heavy_8xb8-32x2x1-100e_kinetics400-rgb_20220812-ed501b31.pth
    rsa_skips: ['backbone_maxpool', 'backbone_pool2', 'cls_head_avg_pool', 'cls_head_dropout']

  - name: 'r2plus1d_r50_mma'
    type: 'video-k400'
    archtype: 'CNNs'
    params: 63.8
    flops: 213
    accuracy: 75.46
    time_as: "input_dim_factorized"
    pre: 'nopre'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 32
      frame_interval: 2
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 256
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/r2plus1d/r2plus1d_r34_8xb8-32x2x1-180e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/r2plus1d/r2plus1d_r34_8xb8-32x2x1-180e_kinetics400-rgb_20220812-4270588c.pth
    layers: [ 'backbone.conv1', 'backbone.layer1', 'backbone.layer2', 'backbone.layer3', 'backbone.layer4', 'cls_head' ]
    rsa_skips: []

  - name: 'slowfast_r50_mma'
    type: 'video-k400'
    archtype: 'CNNs'
    params: 34.6
    flops: 66.1
    accuracy: 76.8
    time_as: "input_dim_multires"
    pre: 'nopre'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 32
      frame_interval: 2
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 256
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/slowfast/slowfast_r50_8xb8-8x8x1-256e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/slowfast/slowfast_r50_8xb8-8x8x1-256e_kinetics400-rgb_20220818-1cb6dfc8.pth
    layers: [ 'backbone.slow_path.conv1', 'backbone.slow_path.layer1', 'backbone.slow_path.layer2', 'backbone.slow_path.layer3', 'backbone.slow_path.layer4', 'backbone.slow_path.conv1_lateral', 'backbone.slow_path.layer1_lateral', 'backbone.slow_path.layer2_lateral', 'backbone.slow_path.layer3_lateral', 'backbone.fast_path.conv1', 'backbone.fast_path.layer1', 'backbone.fast_path.layer2', 'backbone.fast_path.layer3', 'backbone.fast_path.layer4', 'cls_head' ]
    rsa_skips: []

  - name: 'slowfast_r101_mma'
    type: 'video-k400'
    archtype: 'CNNs'
    params: 62.9
    flops: 126
    accuracy: 78.65
    time_as: "input_dim_multires"
    pre: 'nopre'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 32
      frame_interval: 2
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 256
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/slowfast/slowfast_r101_8xb8-8x8x1-256e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/slowfast/slowfast_r101_8xb8-8x8x1-256e_kinetics400-rgb_20220818-9c0e09bd.pth
    layers: [ 'backbone.slow_path.conv1', 'backbone.slow_path.layer1', 'backbone.slow_path.layer2', 'backbone.slow_path.layer3', 'backbone.slow_path.layer4', 'backbone.slow_path.conv1_lateral', 'backbone.slow_path.layer1_lateral', 'backbone.slow_path.layer2_lateral', 'backbone.slow_path.layer3_lateral', 'backbone.fast_path.conv1', 'backbone.fast_path.layer1', 'backbone.fast_path.layer2', 'backbone.fast_path.layer3', 'backbone.fast_path.layer4', 'cls_head' ]
    rsa_skips: []

  - name: 'slow_r50_mma'
    type: 'video-k400'
    archtype: 'CNNs'
    params: 32.45
    flops: 54.75
    accuracy: 75.15
    time_as: "input_dim_local"
    pre: 'nopre'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 8
      frame_interval: 8
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 256
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/slowonly/slowonly_r50_8xb16-8x8x1-256e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/slow/slowonly_r50_8xb16-8x8x1-256e_kinetics400-rgb_20220901-2132fc87.pth
    layers: [ 'backbone.conv1', 'backbone.layer1', 'backbone.layer2', 'backbone.layer3', 'backbone.layer4', 'cls_head' ]
    rsa_skips: []

  - name: 'slow_r101'
    type: 'video-k400'
    archtype: 'CNNs'
    params: 60.36
    flops: 112
    accuracy: 76.59
    time_as: "input_dim_local"
    pre: 'nopre'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 8
      frame_interval: 8
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 256
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/slowonly/slowonly_r101_8xb16-8x8x1-196e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/slow/slowonly_r101_8xb16-8x8x1-196e_kinetics400-rgb_20220901-e6281431.pth
    rsa_skips: ['backbone_maxpool',  'cls_head_avg_pool', 'cls_head_dropout']

  - name: 'slow_r50_in1k'
    type: 'video-k400'
    archtype: 'CNNs'
    params: 32.45
    flops: 54.75
    accuracy: 76.45
    time_as: "input_dim_local"
    pre: 'preimage'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 8
      frame_interval: 8
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 256
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/slowonly/slowonly_imagenet-pretrained-r50_8xb16-8x8x1-steplr-150e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/slow/slowonly_imagenet-pretrained-r50_8xb16-8x8x1-steplr-150e_kinetics400-rgb_20220901-df42dc84.pth
    rsa_skips: ['backbone_maxpool', 'cls_head_avg_pool', 'cls_head_dropout']

  - name: 'slow_r50_in1k_embgauss'
    type: 'video-k400'
    archtype: 'CNNs'
    params: 39.81
    flops: 96.66
    accuracy: 76.65
    time_as: "input_dim_localglobal"
    pre: 'preimage'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 8
      frame_interval: 8
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 256
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/slowonly/slowonly_r50-in1k-pre-nl-embedded-gaussian_8xb16-8x8x1-steplr-150e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/slow/slowonly_r50-in1k-pre-nl-embedded-gaussian_8xb16-8x8x1-steplr-150e_kinetics400-rgb_20220901-df42dc84.pth
    rsa_skips: ['backbone_maxpool', 'cls_head_avg_pool', 'cls_head_dropout']

  - name: 'tanet_r50'
    type: 'video-k400'
    archtype: 'CNNs'
    params: 25.6
    flops: 43
    accuracy: 76.22
    time_as: "input_dim_local"
    pre: 'preimage'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 1
      frame_interval: 1
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 256
      dense_sampling: True
      format_shape: 'NCHW'
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/tanet/tanet_imagenet-pretrained-r50_8xb8-dense-1x1x8-100e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/tanet/tanet_imagenet-pretrained-r50_8xb8-dense-1x1x8-100e_kinetics400-rgb_20220919-a34346bc.pth
    rsa_skips: ['backbone_maxpool', 'cls_head_avg_pool', 'cls_head_consensus', 'cls_head_dropout']

  - name: 'tpn_r50'
    type: 'video-k400'
    archtype: 'CNNs'
    params: 32
    flops: 54
    accuracy: 74.20
    time_as: "input_dim_multires"
    pre: 'nopre'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 8
      frame_interval: 8
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 256
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/tpn/tpn-slowonly_r50_8xb8-8x8x1-150e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/tpn/tpn-slowonly_r50_8xb8-8x8x1-150e_kinetics400-rgb_20220913-97d0835d.pth
    rsa_skips: ['backbone_maxpool',  'cls_head_avg_pool3d', 'cls_head_dropout']

  - name: 'tsm_r50'
    type: 'video-k400'
    archtype: 'CNNs'
    params: 23.87
    flops: 32.88
    accuracy: 73.18
    time_as: "input_dim_local"
    pre: 'preimage'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 1
      frame_interval: 1
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 224
      format_shape: 'NCHW'
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/tsm/tsm_imagenet-pretrained-r50_8xb16-1x1x8-50e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/tsm/tsm_imagenet-pretrained-r50_8xb16-1x1x8-50e_kinetics400-rgb_20220831-64d69186.pth
    rsa_skips: ['backbone_maxpool', 'cls_head_avg_pool', 'cls_head_consensus', 'cls_head_dropout']

  - name: 'tsm_r50_dotprod'
    type: 'video-k400'
    archtype: 'CNNs'
    params: 31.68
    flops: 61.3
    accuracy: 74.49
    time_as: "input_dim_localglobal"
    pre: 'preimage'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 1
      frame_interval: 1
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 224
      format_shape: 'NCHW'
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/tsm/tsm_imagenet-pretrained-r50-nl-dot-product_8xb16-1x1x8-50e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/tsm/tsm_imagenet-pretrained-r50-nl-dot-product_8xb16-1x1x8-50e_kinetics400-rgb_20220831-108bfde5.pth
    rsa_skips: ['backbone_maxpool', 'cls_head_avg_pool', 'cls_head_consensus', 'cls_head_dropout']

  - name: 'tsm_r50_gauss'
    type: 'video-k400'
    archtype: 'CNNs'
    params: 28
    flops: 59.06
    accuracy: 73.66
    time_as: "input_dim_localglobal"
    pre: 'preimage'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 1
      frame_interval: 1
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 224
      format_shape: 'NCHW'
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/tsm/tsm_imagenet-pretrained-r50-nl-gaussian_8xb16-1x1x8-50e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/tsm/tsm_imagenet-pretrained-r50-nl-gaussian_8xb16-1x1x8-50e_kinetics400-rgb_20220831-7e54dacf.pth
    rsa_skips: ['backbone_maxpool', 'cls_head_avg_pool', 'cls_head_consensus', 'cls_head_dropout']

  - name: 'tsm_r50_embgauss'
    type: 'video-k400'
    archtype: 'CNNs'
    params: 31.68
    flops: 61.3
    accuracy: 74.34
    time_as: "input_dim_localglobal"
    pre: 'preimage'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 1
      frame_interval: 1
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 224
      format_shape: 'NCHW'
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/tsm/tsm_imagenet-pretrained-r50-nl-embedded-gaussian_8xb16-1x1x8-50e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/tsm/tsm_imagenet-pretrained-r50-nl-embedded-gaussian_8xb16-1x1x8-50e_kinetics400-rgb_20220831-35eddb57.pth
    rsa_skips: ['backbone_maxpool', 'cls_head_avg_pool', 'cls_head_consensus', 'cls_head_dropout']

  - name: 'tsm_r50_mobones4_16'
    type: 'video-k400'
    archtype: 'CNNs'
    params: 13.72
    flops: 48.65
    accuracy: 74.38
    time_as: "input_dim_local"
    pre: 'preimage'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 1
      frame_interval: 1
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 256
      format_shape: 'NCHW'
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/tsm/tsm_imagenet-pretrained-mobileone-s4_8xb16-1x1x16-50e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/tsm/tsm_imagenet-pretrained-mobileone-s4_8xb16-1x1x16-50e_kinetics400-rgb_20230825-a7f8876b.pth
    rsa_skips: ['cls_head_avg_pool', 'cls_head_consensus', 'cls_head_dropout']

  - name: 'x3d_s'
    type: 'video-k400'
    archtype: 'CNNs'
    params: 3.76
    flops: 1.96
    accuracy: 73.2
    time_as: "input_dim_local"
    pre: 'nopre'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 13
      frame_interval: 6
      resize_size: 182
      crop_type: 'center_crop'
      crop_size: 182
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/x3d/x3d_s_13x6x1_facebook-kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/x3d/x3d_s_13x6x1_facebook-kinetics400-rgb_20201027-623825a0.pth
    rsa_skips: ['backbone_conv1_t_activate', 'backbone_conv1_t_bn', 'cls_head_dropout', 'cls_head_pool',
              'cls_head_relu', 'cls_head_fc1']

  - name: 'x3d_m'
    type: 'video-k400'
    archtype: 'CNNs'
    params: 3.76
    flops: 4.73
    accuracy: 75.2
    time_as: "input_dim_local"
    pre: 'nopre'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 16
      frame_interval: 5
      resize_size: 256
      crop_type: 'center_crop'
      crop_size: 256
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/x3d/x3d_m_16x5x1_facebook-kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/x3d/x3d_m_16x5x1_facebook-kinetics400-rgb_20201027-3f42382a.pth
    rsa_skips: ['backbone_conv1_t_activate', 'backbone_conv1_t_bn', 'cls_head_dropout', 'cls_head_pool',
              'cls_head_relu', 'cls_head_fc1']

  - name: 'MViT_v2_S'
    type: 'video-k400'
    archtype: 'Transformers'
    params: 34.5
    flops: 64
    accuracy: 81.1
    time_as: "input_dim_global"
    pre: 'nopre'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 16
      frame_interval: 4
      resize_size: 224
      crop_type: 'center_crop'
      crop_size: 224
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/mvit/mvit-small-p244_32xb16-16x4x1-200e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/mvit/mvit-small-p244_16x4x1_kinetics400-rgb_20221021-9ebaaeed.pth
    rsa_skips: ['backbone_patch_embed_projection', 'backbone_norm3',  'cls_head_dropout']

  - name: 'MViT_v2_B'
    type: 'video-k400'
    archtype: 'Transformers'
    params: 51.2
    flops: 225
    accuracy: 82.6
    time_as: "input_dim_global"
    pre: 'nopre'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 32
      frame_interval: 3
      resize_size: 224
      crop_type: 'center_crop'
      crop_size: 224
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/mvit/mvit-base-p244_32x3x1_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/mvit/mvit-base-p244_32x3x1_kinetics400-rgb_20221021-f392cd2d.pth
    rsa_skips: ['backbone_patch_embed_projection', 'backbone_norm3',  'cls_head_dropout']

  - name: 'video_swin_tiny'
    type: 'video-k400'
    archtype: 'Transformers'
    params: 28.2
    flops: 88
    accuracy: 78.9
    time_as: "input_dim_localglobal"
    pre: 'preimage'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 32
      frame_interval: 2
      resize_size: 224
      crop_type: 'center_crop'
      crop_size: 224
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/swin/swin-tiny-p244-w877_in1k-pre_8xb8-amp-32x2x1-30e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/swin/swin-tiny-p244-w877_in1k-pre_8xb8-amp-32x2x1-30e_kinetics400-rgb_20220930-241016b2.pth
    layers: ['backbone.layers.0', 'backbone.layers.1', 'backbone.layers.2.blocks.1', 'backbone.layers.2.blocks.3',
             'backbone.layers.2.blocks.5', 'backbone.layers.2.blocks.7', 'backbone.layers.2.blocks.9',
             'backbone.layers.2.blocks.11', 'backbone.layers.2.blocks.13', 'backbone.layers.2.blocks.15',
             'backbone.layers.2.blocks.17', 'backbone.layers.3', 'cls_head.fc_cls']
    rsa_skips: []

  - name: 'video_swin_small'
    type: 'video-k400'
    archtype: 'Transformers'
    params: 49.8
    flops: 166
    accuracy: 80.54
    time_as: "input_dim_localglobal"
    pre: 'preimage'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 32
      frame_interval: 2
      resize_size: 224
      crop_type: 'center_crop'
      crop_size: 224
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/swin/swin-small-p244-w877_in1k-pre_8xb8-amp-32x2x1-30e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/swin/swin-small-p244-w877_in1k-pre_8xb8-amp-32x2x1-30e_kinetics400-rgb_20220930-e91ab986.pth
    layers: ['backbone.layers.0', 'backbone.layers.1', 'backbone.layers.2.blocks.1', 'backbone.layers.2.blocks.3',
             'backbone.layers.2.blocks.5', 'backbone.layers.2.blocks.7', 'backbone.layers.2.blocks.9',
             'backbone.layers.2.blocks.11', 'backbone.layers.2.blocks.13', 'backbone.layers.2.blocks.15',
             'backbone.layers.2.blocks.17', 'backbone.layers.3', 'cls_head.fc_cls']
    rsa_skips: []

  - name: 'video_swin_base'
    type: 'video-k400'
    archtype: 'Transformers'
    params: 88
    flops: 282
    accuracy: 80.57
    time_as: "input_dim_localglobal"
    pre: 'preimage'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 32
      frame_interval: 2
      resize_size: 224
      crop_type: 'center_crop'
      crop_size: 224
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/swin/swin-base-p244-w877_in1k-pre_8xb8-amp-32x2x1-30e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/swin/swin-base-p244-w877_in1k-pre_8xb8-amp-32x2x1-30e_kinetics400-rgb_20220930-182ec6cc.pth
    layers: ['backbone.layers.0', 'backbone.layers.1', 'backbone.layers.2.blocks.1', 'backbone.layers.2.blocks.3',
             'backbone.layers.2.blocks.5', 'backbone.layers.2.blocks.7', 'backbone.layers.2.blocks.9',
             'backbone.layers.2.blocks.11', 'backbone.layers.2.blocks.13', 'backbone.layers.2.blocks.15',
             'backbone.layers.2.blocks.17', 'backbone.layers.3', 'cls_head.fc_cls']
    rsa_skips: []

  - name: 'timesformer_divST'
    type: 'video-k400'
    archtype: 'Transformers'
    params: 122
    flops: 196
    accuracy: 77.69
    time_as: "input_dim_factorized"
    pre: 'preimage'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 8
      frame_interval: 32
      resize_size: 224
      crop_type: 'center_crop'
      crop_size: 224
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb_20220815-a4d0d01f.pth
    rsa_skips: ['backbone_patch_embed_projection', 'backbone_drop_after_pos', 'backbone_drop_after_time', 'backbone_norm']

  - name: 'timesformer_jointST'
    type: 'video-k400'
    archtype: 'Transformers'
    params: 86.11
    flops: 180
    accuracy: 76.95
    time_as: "input_dim_global"
    pre: 'preimage'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 8
      frame_interval: 32
      resize_size: 224
      crop_type: 'center_crop'
      crop_size: 224
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/timesformer/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/timesformer/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb_20220815-8022d1c0.pth
    rsa_skips: ['backbone_patch_embed_projection', 'backbone_drop_after_pos', 'backbone_drop_after_time', 'backbone_norm']

  - name: 'uniformer_S'
    type: 'video-k400'
    archtype: 'Transformers'
    params: 21.4
    flops: 41.8
    accuracy: 80.8
    time_as: "input_dim_localglobal"
    pre: 'preimage'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 16
      frame_interval: 4
      resize_size: 224
      crop_type: 'center_crop'
      crop_size: 224
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/uniformer/uniformer-small_imagenet1k-pre_16x4x1_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/uniformer/uniformer-small_imagenet1k-pre_16x4x1_kinetics400-rgb_20221219-c630a037.pth
    layers: ['backbone.blocks1.2', 'backbone.blocks2.3', 'backbone.blocks3.7', 'backbone.blocks4.2', 'cls_head' ]
    rsa_skips: []

  - name: 'uniformer_B'
    type: 'video-k400'
    archtype: 'Transformers'
    params: 49.8
    flops: 96.7
    accuracy: 82.0
    time_as: "input_dim_localglobal"
    pre: 'preimage'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 16
      frame_interval: 4
      resize_size: 224
      crop_type: 'center_crop'
      crop_size: 224
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/uniformer/uniformer-base_imagenet1k-pre_16x4x1_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/uniformer/uniformer-base_imagenet1k-pre_16x4x1_kinetics400-rgb_20221219-157c2e66.pth
    layers: ['backbone.blocks1.4', 'backbone.blocks2.7', 'backbone.blocks3.19', 'backbone.blocks4.6', 'cls_head' ]
    rsa_skips: []

  - name: 'uniformer_v2_B_16_CLIP_k400'
    type: 'video-k400'
    archtype: 'Transformers'
    params: 115
    flops: 100
    accuracy: 84.3
    time_as: "input_dim_localglobal"
    pre: 'preimage'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 8
      frame_interval: 12
      resize_size: 224
      crop_type: 'center_crop'
      crop_size: 224
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/uniformerv2/uniformerv2-base-p16-res224_clip_8xb32-u8_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/uniformerv2/uniformerv2-base-p16-res224_clip_8xb32-u8_kinetics400-rgb_20230313-e29fc968.pth
    rsa_skips: ['backbone_ln_pre']

  - name: 'uniformer_v2_B_16_CLIP_k710_k400'
    type: 'video-k400'
    archtype: 'Transformers'
    params: 115
    flops: 100
    accuracy: 85.6
    time_as: "input_dim_localglobal"
    pre: 'prevideo'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 8
      frame_interval: 12
      resize_size: 224
      crop_type: 'center_crop'
      crop_size: 224
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/uniformerv2/uniformerv2-base-p16-res224_clip-kinetics710-pre_8xb32-u8_kinetics400-rgb.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/uniformerv2/uniformerv2-base-p16-res224_clip-kinetics710-pre_8xb32-u8_kinetics400-rgb_20230313-75be0806.pth
    rsa_skips: [ 'backbone_transformer_dec_0', 'backbone_transformer_dec_1', 'backbone_transformer_dec_2', 'backbone_transformer_dec_3',
                 'backbone_transformer_dpe_0', 'backbone_transformer_dpe_1', 'backbone_transformer_dpe_2', 'backbone_transformer_dpe_3',
                 'backbone_transformer_norm', 'backbone_ln_pre']

  - name: 'videomae_B'
    type: 'video-k400'
    archtype: 'Transformers'
    params: 87
    flops: 180
    accuracy: 81.3
    time_as: "input_dim_and_supervision_signal"
    pre: 'prevideo'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 16
      frame_interval: 4
      resize_size: 224
      crop_type: 'center_crop'
      crop_size: 224
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/videomae/vit-base-p16_videomae-k400-pre_16x4x1_kinetics-400.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/videomae/vit-base-p16_videomae-k400-pre_16x4x1_kinetics-400_20221013-860a3cd3.pth
    rsa_skips: ['backbone_patch_embed_projection', 'backbone_norm', 'backbone_fc_norm', 'backbone_pos_drop']

  - name: 'videomae_v2_S'
    type: 'video-k400'
    archtype: 'Transformers'
    params: 22
    flops: 57
    accuracy: 83.6
    time_as: "input_dim_and_supervision_signal"
    pre: 'prevideo'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 16
      frame_interval: 4
      resize_size: 224
      crop_type: 'center_crop'
      crop_size: 224
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/videomaev2/vit-small-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/videomaev2/vit-small-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_20230510-25c748fd.pth
    rsa_skips: ['backbone_patch_embed_projection', 'backbone_norm', 'backbone_fc_norm', 'backbone_pos_drop']

  - name: 'videomae_v2_B'
    type: 'video-k400'
    archtype: 'Transformers'
    params: 87
    flops: 180
    accuracy: 86.6
    time_as: "input_dim_and_supervision_signal"
    pre: 'prevideo'
    set: # leave None
    netset_fallback: 'Pyvideo'
    extractor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.extract_mmaction
      stage: 'head'  # 'backbone', 'neck', or 'head'
    preprocessor:
      _partial_: True
      _target_: repralign.models.custom_extraction_functions.preprocess_mmaction
      clip_len: 16
      frame_interval: 4
      resize_size: 224
      crop_type: 'center_crop'
      crop_size: 224
    cfg:
      _target_: repralign.models.loading.mmaction_loader
      path_to_config: ${location}/workspace/code/mmaction2/configs/recognition/videomaev2/vit-base-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400.py
      path_to_checkpoint: ${location}/workspace/code/mmaction2/checkpoints/videomaev2/vit-base-p16_videomaev2-vit-g-dist-k710-pre_16x4x1_kinetics-400_20230510-3e7f93b2.pth
    rsa_skips: ['backbone_patch_embed_projection', 'backbone_norm', 'backbone_fc_norm', 'backbone_pos_drop']
