# for fine-tuning multi module
model_size="base"
data_size="433"
modal="multi"
root_folder_name='av_hubert'
data_path="/home/disk/data/"${data_size}"h_data"
finetune_config_dir="/home/xxx/"${root_folder_name}"/avhubert/conf/custom"
finetune_config_name=${model_size}"_"${modal}".yaml"  # 30h和433h的训练config是一致的
user_dir="/home/xxx/"${root_folder_name}"/avhubert"
bpe_dir="/home/disk/video_output/spm1000/spm_unigram1000.model"
pretrained_model_ckpt_path="/home/xxx/"${root_folder_name}"/trained_model/"${model_size}"_lrs3_iter5.pt"


for((i=1;i<=1;i++));
do
finetune_dir="/home/disk/avhubert_experiment/fine-tune/"${modal}"_"${model_size}"_"${data_size}"_"${i}
# finetune
CUDA_VISIBLE_DEVICES=1 fairseq-hydra-train --config-dir ${finetune_config_dir} \
  --config-name ${finetune_config_name} \
  task.data=${data_path} \
  task.label_dir=${data_path} \
  +model.w2v_path=${pretrained_model_ckpt_path} \
  +hydra.run.dir=${finetune_dir} \
  +model.label_rate=100 \
  +common.user_dir=${user_dir} \
  +common.seed=${i} \
  +task.tokenizer_bpe_model=${bpe_dir}


# decode
script_path="/home/xxx/"${root_folder_name}"/avhubert"
decode_config_dir="/home/xxx/"${root_folder_name}"/avhubert/conf"
decode_config_name="s2s_decode.yaml"
lab_path="/home/disk/video_output/lab"
decode_ckpt_path=${finetune_dir}"/checkpoints/checkpoint_best.pt"
result_path="/home/disk/avhubert_experiment/decode/s2s/test"

python -B ${script_path}/infer_s2s.py \
  --config-dir ${decode_config_dir} \
  --config-name ${decode_config_name} \
  dataset.gen_subset=test \
  common_eval.path=${decode_ckpt_path} \
  common_eval.results_path=${result_path} \
  override.modalities=['audio','video'] \
  common.user_dir=${user_dir} \
  +override.data=${data_path} \
  +override.label_dir=${data_path}
done



