data:
  train:
    _target_: omnivision.data.concat_dataset.ConcatDataset
    max_steps: sum
    repeat_factors: [1.0]
    datasets:
    - _target_: omnivision.data.torch_dataset.TorchDataset
      dataset:
        _target_: omnivision.data.path_dataset.VideoPathDataset
        path_file_list:
          - ${sim150_train_vids_path}
        label_file_list:
          - ${sim150_train_labels_path}
        new_prefix: ${sim150_prefix}
        clip_sampler:
          _target_: pytorchvideo.data.clip_sampling.RandomClipSampler
          clip_duration: 2
        frame_sampler:
          _target_: pytorchvideo.transforms.UniformTemporalSubsample
          num_samples: ${num_frames}
        decoder: pyav
        normalize_to_0_1: True
        transforms:
          - _target_: omnivision.data.transforms.transform_wrappers.VisionTransform
            base_transform:
              _target_: torchvision.transforms.Compose
              transforms:
              - _target_: pytorchvideo.transforms.ShortSideScale
                size: 256
              - _target_: torchvision.transforms.RandomResizedCrop
                size: 224
              - _target_: torchvision.transforms.RandomHorizontalFlip
                p: 0.5
              - _target_: torchvision.transforms._transforms_video.NormalizeVideo
                mean: [0.485, 0.456, 0.406]
                std: [0.229, 0.224, 0.225]
      shuffle: True
      batch_size: ${batch_size}
      num_workers: ${num_workers}
      pin_memory: True
      drop_last: True
      collate_fn:
        _target_: omnivision.data.api.DefaultOmnivoreCollator
        output_key: sim150
        batch_kwargs:
          model_fwd_kwargs:
            use_checkpoint: True
          accum_steps: 1
        batch_transforms:
        - _target_: omnivision.data.transforms.cutmixup.CutMixUp
          mixup_alpha: 0.8 # mixup alpha value, mixup is active if > 0.
          cutmix_alpha: 1.0 # cutmix alpha value, cutmix is active if > 0.
          prob: 1.0 # probability of applying mixup or cutmix per batch or element
          switch_prob: 0.5 # probability of switching to cutmix instead of mixup when both are active
          mode: batch # how to apply mixup/cutmix params (per 'batch', 'pair' (pair of elements), 'elem' (element)
          correct_lam: True # apply lambda correction when cutmix bbox clipped by image borders
          label_smoothing: 0.1 # apply label smoothing to the mixed target tensor
          num_classes: 150 # number of classes for target
      worker_init_fn: NULL
  val:
    _target_: omnivision.data.concat_dataset.ConcatDataset
    max_steps: sum
    datasets:
    - _target_: omnivision.data.torch_dataset.TorchDataset
      dataset:
        _target_: omnivision.data.path_dataset.VideoPathDataset
        path_file_list:
          - ${sim150_val_vids_path}
        label_file_list:
          - ${sim150_val_labels_path}
        new_prefix: ${sim150_prefix}
        clip_sampler:
          _target_: pytorchvideo.data.clip_sampling.ConstantClipsPerVideoSampler
          clip_duration: 10
          clips_per_video: 1
        frame_sampler:
          _target_: pytorchvideo.transforms.UniformTemporalSubsample
          num_samples: 160
        decoder: pyav
        normalize_to_0_1: True
        transforms:
          - _target_: omnivision.data.transforms.transform_wrappers.VisionTransform
            base_transform:
              _target_: torchvision.transforms.Compose
              transforms:
              - _target_: pytorchvideo.transforms.ShortSideScale
                size: 224
              - _target_: torchvision.transforms._transforms_video.NormalizeVideo
                mean: [0.485, 0.456, 0.406]
                std: [0.229, 0.224, 0.225]
              - _target_: omnivision.data.transforms.pytorchvideo.TemporalCrop
                frames_per_clip: ${num_frames}
                stride: 40
              - _target_: omnivision.data.transforms.pytorchvideo.SpatialCrop
                crop_size: 224
                num_crops: 3
      shuffle: False
      batch_size: ${batch_size}
      num_workers: ${num_workers}
      pin_memory: True
      drop_last: True
      collate_fn:
        _target_: omnivision.data.api.DefaultOmnivoreCollator
        output_key: sim150
      worker_init_fn: NULL
metrics:
  train:
    sim150:
      accuracy_top1:
        _target_: omnivision.metrics.avg_pooled_accuracy_list_meter.AvgPooledAccuracyListMeter
        top_k: 1
      accuracy_top5:
        _target_: omnivision.metrics.avg_pooled_accuracy_list_meter.AvgPooledAccuracyListMeter
        top_k: 5
  val:
    sim150:
      accuracy_top1:
        _target_: omnivision.metrics.avg_pooled_accuracy_list_meter.AvgPooledAccuracyListMeter
        top_k: 1
      accuracy_top5:
        _target_: omnivision.metrics.avg_pooled_accuracy_list_meter.AvgPooledAccuracyListMeter
        top_k: 5
loss:
  sim150:
    _target_: torch.nn.CrossEntropyLoss