dset_name=$1
v_feat_types=$2
t_feat_type=clip
results_root=/data/mjjung/iclr/results_${dset_name}/${v_feat_types}
ctx_mode=video_tef
exp_id=exp

bsz=32
max_v_l=75
max_q_l=32
feat_root=/data/mjjung/features_detr
stop_metric=MR-full-R1@0.7

######## data paths

case ${dset_name} in
  charades-CD)
    echo "charades-CD"

    ######## data paths
    train_path=data/${dset_name}/train.jsonl
    eval_path=data/${dset_name}/val.jsonl
    eval_split_name=val
    meta_by_qid_path=data/${dset_name}/meta_by_qid.json

    # options
    n_epoch=100
    clip_length=1
    lr=0.0002
    lr_drop=40
    lw_saliency=4

    ######## setup video+text features
    # video features
    v_feat_dim=0
    v_feat_dirs=()
    if [[ ${v_feat_types} == *"slowfast"* ]]; then
      v_feat_dirs+=(${feat_root}/charades_features/slowfast_features)
      (( v_feat_dim += 2304 ))  # double brackets for arithmetic op, no need to use ${v_feat_dim}
    fi
    if [[ ${v_feat_types} == *"clip"* ]]; then
      v_feat_dirs+=(${feat_root}/charades_features/clip_features)
      (( v_feat_dim += 512 ))
    fi
    if [[ ${v_feat_types} == *"i3d"* ]]; then
      v_feat_dirs=(${feat_root}/charades_features/i3d_finetuned2)
      (( v_feat_dim = 1024 ))
    fi

    # text features
    if [[ ${t_feat_type} == "clip" ]]; then
      t_feat_dir=${feat_root}/charades-CD_features/clip_text_features/
      t_feat_dim=512
    elif [[ ${t_feat_type} == "glove" ]]; then
      t_feat_dir=None
      t_feat_dim=300
    else
      echo "Wrong arg for t_feat_type."
      exit 1
    fi
    ;;
  charades-CG)
    echo "charades-CG"

    ######## data paths
    train_path=data/charades-CG/train.jsonl
    eval_path=data/charades-CG/test_trivial.jsonl
    eval_split_name=test_trivial
    meta_by_qid_path=data/charades-CG/meta_by_qid.json

    # options
    n_epoch=100
    clip_length=1
    lr=0.0002
    lr_drop=40
    lw_saliency=4

    ######## setup video+text features
    # video features
    v_feat_dim=0
    v_feat_dirs=()
    if [[ ${v_feat_types} == *"slowfast"* ]]; then
      v_feat_dirs+=(${feat_root}/charades_features/slowfast_features)
      (( v_feat_dim += 2304 ))  # double brackets for arithmetic op, no need to use ${v_feat_dim}
    fi
    if [[ ${v_feat_types} == *"clip"* ]]; then
      v_feat_dirs+=(${feat_root}/charades_features/clip_features)
      (( v_feat_dim += 512 ))
    fi
    if [[ ${v_feat_types} == *"c3d"* ]]; then
      v_feat_dirs=(${feat_root}/charades_features/c3d_features2)
      (( v_feat_dim = 1024 ))
    fi
    if [[ ${v_feat_types} == *"i3d"* ]]; then
      v_feat_dirs=(${feat_root}/charades_features/i3d_features)
      (( v_feat_dim = 1024 ))
    fi

    # text features
    if [[ ${t_feat_type} == "clip" ]]; then
      t_feat_dir=${feat_root}/charades-CG_features/clip_text_features/
      t_feat_dim=512
    elif [[ ${t_feat_type} == "glove" ]]; then
      t_feat_dim=300
    else
      echo "Wrong arg for t_feat_type."
      exit 1
    fi
    ;;

  charades)
    echo "charades-STA"

    ######## data paths
    train_path=data/charades/train.jsonl
    eval_path=data/charades/test.jsonl
    eval_split_name=test
    meta_by_qid_path=data/charades/meta_by_qid.json

    # options
    n_epoch=100
    clip_length=1
    lr=0.0002
    lr_drop=40
    lw_saliency=4

    ######## setup video+text features
    # video features
    v_feat_dim=0
    v_feat_dirs=()
    if [[ ${v_feat_types} == *"slowfast"* ]]; then
      v_feat_dirs+=(${feat_root}/charades_features/slowfast_features)
      (( v_feat_dim += 2304 ))  # double brackets for arithmetic op, no need to use ${v_feat_dim}
    fi
    if [[ ${v_feat_types} == *"clip"* ]]; then
      v_feat_dirs+=(${feat_root}/charades_features/clip_features)
      (( v_feat_dim += 512 ))
    fi
    if [[ ${v_feat_types} == *"c3d"* ]]; then
      v_feat_dirs=(${feat_root}/charades_features/c3d_features2)
      (( v_feat_dim = 1024 ))
    fi
    if [[ ${v_feat_types} == *"i3d"* ]]; then
      v_feat_dirs=(${feat_root}/charades_features/i3d_finetuned3)
      (( v_feat_dim = 1024 ))
    fi
    if [[ ${v_feat_types} == *"vgg"* ]]; then
      v_feat_dirs=(${feat_root}/charades_features/vgg_features2)
      (( v_feat_dim = 4096 ))
    fi


    # text features
    if [[ ${t_feat_type} == "clip" ]]; then
      t_feat_dir=${feat_root}/charades_features/clip_text_features/
      t_feat_dim=512
    else
      echo "Wrong arg for t_feat_type."
      exit 1
    fi
    ;;

  activitynet)
    echo "activitynet"

    ######## data paths
    train_path=data/activitynet/train.jsonl
    train_path=data/activitynet/anet_sbert.jsonl
    eval_path=data/activitynet/val_2.jsonl
    eval_split_name=val
    meta_by_qid_path=data/activitynet/meta_by_qid.json

    # options
    clip_length=2
    max_v_l=150
    max_q_l=75
    n_epoch=100
    lr=0.0001
    lr_drop=80
    lw_saliency=1

    ######## setup video+text features
    # video features
    v_feat_dim=0
    v_feat_dirs=()

#    if [[ ${v_feat_types} == *"slowfast"* ]]; then
#      v_feat_dirs+=(${feat_root}/activitynet_features/slowfast_features)
#      (( v_feat_dim += 2304 ))  # double brackets for arithmetic op, no need to use ${v_feat_dim}
#    fi
#    if [[ ${v_feat_types} == *"clip"* ]]; then
#      v_feat_dirs+=(${feat_root}/activitynet_features/clip_features)
#      (( v_feat_dim += 512 ))
#    fi
    if [[ ${v_feat_types} == *"c3d"* ]]; then
      if [[ ${v_feat_types} == *"pca_c3d"* ]]; then
        v_feat_dirs+=(${feat_root}/activitynet_features/pca_c3d_features)
        (( v_feat_dim = 500 ))
      else
        v_feat_dirs+=(${feat_root}/activitynet_features/c3d_features)
        (( v_feat_dim = 1024 ))
      fi
    fi

    # text features
    if [[ ${t_feat_type} == "clip" ]]; then
      t_feat_dir=${feat_root}/activitynet_features/clip_text_features/
      t_feat_dim=512
    elif [[ ${t_feat_type} == "glove" ]]; then
      t_feat_dim=300
    else
      echo "Wrong arg for t_feat_type."
      exit 1
    fi
    ;;

    activitynet-CG)
    echo "activitynet-CG"

    ######## data paths
    train_path=data/activitynet-CG/train.jsonl
    eval_path=data/activitynet-CG/test_trivial.jsonl
    eval_split_name=test_trivial
    meta_by_qid_path=data/activitynet-CG/meta_by_qid.json

    # options
    clip_length=2
    max_v_l=150
    max_q_l=75
    lr=0.0001
    n_epoch=100
    lr_drop=60
    lw_saliency=4

    ######## setup video+text features
    # video features
    v_feat_dim=0
    v_feat_dirs=()
    if [[ ${v_feat_types} == *"slowfast"* ]]; then
      v_feat_dirs+=(${feat_root}/activitynet_features/slowfast_features)
      (( v_feat_dim += 2304 ))  # double brackets for arithmetic op, no need to use ${v_feat_dim}
    fi
    if [[ ${v_feat_types} == *"clip"* ]]; then
      v_feat_dirs+=(${feat_root}/activitynet_features/clip_features)
      (( v_feat_dim += 512 ))
    fi
    if [[ ${v_feat_types} == *"c3d"* ]]; then
      if [[ ${v_feat_types} == *"pca_c3d"* ]]; then
        v_feat_dirs+=(${feat_root}/activitynet_features/pca_c3d_features)
        (( v_feat_dim = 500 ))
      else
        v_feat_dirs+=(${feat_root}/activitynet_features/c3d_features)
        (( v_feat_dim = 1024 ))
      fi
    fi
    if [[ ${v_feat_types} == *"i3d"* ]]; then
      v_feat_dirs+=(${feat_root}/activitynet_features/i3d_features2)
      (( v_feat_dim = 1024 ))
    fi

    # text features
    if [[ ${t_feat_type} == "clip" ]]; then
      t_feat_dir=${feat_root}/activitynet-CG_features/clip_text_features/
      t_feat_dim=512
    elif [[ ${t_feat_type} == "glove" ]]; then
      t_feat_dim=300
    else
      echo "Wrong arg for t_feat_type."
      exit 1
    fi
    ;;

  activitynet-CD)
    echo "activitynet-CD"

    ######## data paths
    train_path=data/activitynet-CD/train.jsonl
    eval_path=data/activitynet-CD/val.jsonl
    eval_split_name=val

    # options
    clip_length=2
    max_v_l=150
    max_q_l=75
    n_epoch=50
    lr=0.0001
    lr_drop=60
    lw_saliency=4

    ######## setup video+text features
    feat_root=/data/mjjung/features_detr/
    meta_by_qid_path=data/activitynet-CD/meta_by_qid.json

    # video features
    v_feat_dim=0
    v_feat_dirs=()
    if [[ ${v_feat_types} == *"slowfast"* ]]; then
      v_feat_dirs+=(${feat_root}/activitynet_features/slowfast_features)
      (( v_feat_dim += 2304 ))  # double brackets for arithmetic op, no need to use ${v_feat_dim}
    fi
    if [[ ${v_feat_types} == *"clip"* ]]; then
      v_feat_dirs+=(${feat_root}/activitynet_features/clip_features)
      (( v_feat_dim += 512 ))
    fi
    if [[ ${v_feat_types} == *"i3d"* ]]; then
      v_feat_dirs=(${feat_root}/activitynet_features/i3d_features2)
      (( v_feat_dim = 1024 ))
    fi

    # text features
    if [[ ${t_feat_type} == "clip" ]]; then
      t_feat_dir=${feat_root}/activitynet-CD_features/clip_text_features/
      t_feat_dim=512
    elif [[ ${t_feat_type} == "glove" ]]; then
      t_feat_dim=300
    else
      echo "Wrong arg for t_feat_type."
      exit 1
    fi
    ;;

  hl)
    echo "QVHighlights"

    ######## data paths
    train_path=data/QVHighlights/highlight_train.jsonl
    eval_path=data/QVHighlights/highlight_val_release.jsonl
    meta_by_qid_path=data/hl/meta_by_qid.json
    eval_split_name=val

    # options
    clip_length=2
    n_epoch=200
    max_v_l=75
    max_q_l=32
    lr=0.0001
    lr_drop=400
    lw_saliency=1
    stop_metric=MR-full-mAP

    ######## setup video+text features
    # video features
    v_feat_dim=0
    v_feat_dirs=()
    if [[ ${v_feat_types} == *"slowfast"* ]]; then
      v_feat_dirs+=(${feat_root}/hl_features/slowfast_features)
      (( v_feat_dim += 2304 ))  # double brackets for arithmetic op, no need to use ${v_feat_dim}
    fi
    if [[ ${v_feat_types} == *"clip"* ]]; then
      v_feat_dirs+=(${feat_root}/hl_features/clip_features)
      (( v_feat_dim += 512 ))
    fi

    # text features
    if [[ ${t_feat_type} == "clip" ]]; then
      t_feat_dir=${feat_root}/hl_features/clip_text_features/
      t_feat_dim=512
    else
      echo "Wrong arg for t_feat_type."
      exit 1
    fi
    ;;

  tacos)
    echo "tacos"

    ######## data paths
    train_path=data/tacos/train.jsonl
    train_path=data/tacos/tacos_sbert.jsonl
    eval_path=data/tacos/test.jsonl
    meta_by_qid_path=data/tacos/meta_by_qid_path.json
    eval_split_name=test

    # options
    clip_length=2
    n_epoch=100
    max_v_l=75
    max_q_l=32
    lr=0.0002
    lr_drop=100
    lw_saliency=4
    stop_metric=MR-full-R1@0.5

    ######## setup video+text features
    feat_root=/data/mjjung/features_detr/tacos_features

    # video features
    v_feat_dim=0
    v_feat_dirs=()
    if [[ ${v_feat_types} == *"slowfast"* ]]; then
      v_feat_dirs+=(${feat_root}/slowfast_features)
      (( v_feat_dim += 2304 ))  # double brackets for arithmetic op, no need to use ${v_feat_dim}
    fi
    if [[ ${v_feat_types} == *"clip"* ]]; then
      v_feat_dirs+=(${feat_root}/clip_features)
      (( v_feat_dim += 512 ))
    fi
    if [[ ${v_feat_types} == *"c3d"* ]]; then
      v_feat_dirs+=(${feat_root}/c3d_features)
      (( v_feat_dim += 1024 ))
    fi
    # text features
    if [[ ${t_feat_type} == "clip" ]]; then
      t_feat_dir=${feat_root}/clip_text_features/
      t_feat_dim=512
    else
      echo "Wrong arg for t_feat_type."
      exit 1
    fi
    ;;

  *)
    echo "Wrong dataset."
    exit 1
esac

echo "feat types: " ${v_feat_types} ${t_feat_type}

PYTHONPATH=$PYTHONPATH:. python bm_detr/train.py \
--dset_name ${dset_name} \
--ctx_mode ${ctx_mode} \
--train_path ${train_path} \
--eval_path ${eval_path} \
--meta_by_qid_path ${meta_by_qid_path} \
--eval_split_name ${eval_split_name} \
--v_feat_dirs ${v_feat_dirs[@]} \
--v_feat_dim ${v_feat_dim} \
--v_feat_type ${v_feat_types} \
--n_epoch ${n_epoch} \
--lr ${lr} \
--lr_drop ${lr_drop} \
--lw_saliency ${lw_saliency} \
--t_feat_type ${t_feat_type} \
--t_feat_dir ${t_feat_dir} \
--t_feat_dim ${t_feat_dim} \
--clip_length ${clip_length} \
--max_v_l ${max_v_l} \
--max_q_l ${max_q_l} \
--bsz ${bsz} \
--stop_metric ${stop_metric} \
--results_root ${results_root} \
--exp_id ${exp_id} \
${@:3}
