# @package _global_
config:
  EXPR_NAME: devel
  NOTE: ""
  SEED: -1
  DATASET: movienet
  DATA_PATH: None
  NUM_MOVIES: 1100    # only pretrain [1100, 782, 972]
  USE_RAW_SHOT: False
  MODEL:
    use_sync_bn: True
    shot_encoder: 
      name: resnet # [resnet, vit]
      pretrained: True
      resnet:
        input_shape: [3, 224, 224]
        depth: 50
        weights: IMAGENET1K_V1 # [IMAGENET1K_V1, IMAGENET1K_V2]
        params:
          zero_init_residual: True
      vit:
        img_size: 224
        weights: vit_small_patch32_224
    contextual_relation_network:
      enabled: True
      name: trn
      attention_mask_type: default
      params:
        trn:
          input_dim: 384
          is_decoder: False
          add_cross_attention: False
          chunk_size_feed_forward: 0
          attention_probs_dropout_prob: 0.1
          hidden_act: gelu
          hidden_dropout_prob: 0.1
          hidden_size: 768
          intermediate_size: 3072
          layer_norm_eps: 1e-12
          num_attention_heads: 8
          num_hidden_layers: 2
          pooling_method: center
          _attn_implementation: eager # [eager, sdpa] ### added
  TRAIN:
    NUM_KEYFRAME: 3
    BATCH_SIZE:
      effective_batch_size: 8
    TRANSFORM:
      - name: VideoRandomResizedCrop
        size: 224
        bottom_area: 0.14 # 0.5 # 0.14
      - name: VideoRandomHFlip
      - name: VideoRandomColorJitter
        brightness: 0.2
        contrast: 0.2
        saturation: 0.2
        hue: 0.05
        p: 0.8
        consistent: False
      - name: VideoRandomGaussianBlur
        radius_min: 0.1
        radius_max: 2.0
        p: 0.5
      - name: VideoToTensor
        mean: [0.485, 0.456, 0.406] # [0.485, 0.456, 0.406], [0.48145466, 0.4578275, 0.40821073]
        std: [0.229, 0.224, 0.225] # [0.229, 0.224, 0.225], [0.26862954, 0.26130258, 0.27577711]
    NUM_WORKERS: 16
    PIN_MEMORY: False
    OPTIMIZER:
      name: adam
      weight_decay: 0
      regularize_bn: False
      regularize_bias: False
      lr:
        base_lr: 0.0000025
        base_lr_batch_size: 256
        auto_scale: True
      scheduler:
        name: cosine_with_linear_warmup
        warmup: 0.0  # ratio
  TEST:
    VAL_FREQ: 100
    BATCH_SIZE:
      effective_batch_size: 1024
    TRANSFORM:
      - name: VideoResizedCenterCrop
        image_size: 256
        crop_size: 224
      - name: VideoToTensor
        mean: [0.485, 0.456, 0.406]
        std: [0.229, 0.224, 0.225]
  LOSS:
    sampling_method:
      name: sbd
      params:
        sbd:
          neighbor_size: 8
        asymmetric:
          neighbor_left: 8
          neighbor_right: 1
          neighbor_interval: 1
    type: weighted_cross_entropy
    activate_nearby_shots: False
    first_shot_prediction: False
    reverse_shot_prediction: False
  TRAINER:
    accelerator: gpu
    precision: 16
    max_epochs: 1
    detect_anomaly: False
    num_sanity_val_steps: 0
  DISTRIBUTED:
    NUM_NODES: 1
    NUM_PROC_PER_NODE: 1
  OTHER_MODALITY:
    TYPE: []
    PLACE_PATH: None
    AUDIO_PATH: None