#!/bin/bash

ultracm_path=/cpfs02/llm/shared/public/lantian/exp/20240501_sft_7b_critique_ultracm_878/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_critic_ultracm/878_hf_ckpt
autoj_path=/cpfs02/llm/shared/public/lantian/exp/20240501_sft_7b_critique_autoj_26/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_critic_autoj/26_hf_ckpt
internlm2_s1=/cpfs02/llm/shared/public/lantian/exp/20240418_aliyun_Ampere_7B_v1_1_FT_v1_0_0_s1_rc48_1660_hf_ckpt
d4_st_5_l1_resumm=/cpfs02/llm/shared/public/lantian/exp/20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_385_epoch_2_0701/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm/385_hf_ckpt
models=($internlm2_s1 $ultracm_path $autoj_path $d4_st_5_l1_resumm)

### multi-agent exp 400 step
qwen_400_path=/cpfs02/llm/shared/public/lantian/exp/metacritique_step_train_exp/qwen_feedback_step_400/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_qwen_feedback/400_hf_ckpt
internlm2_400_path=/cpfs02/llm/shared/public/lantian/exp/metacritique_step_train_exp/internlm2_feedback_step_400/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_internlm_feedback/400_hf_ckpt
claude_400_path=/cpfs02/llm/shared/public/lantian/exp/metacritique_step_train_exp/claude_feedback_step_400/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_claude_feedback/400_hf_ckpt
gpt4_400_path=/cpfs02/llm/shared/public/lantian/exp/metacritique_step_train_exp/gpt4_feedback_step_400/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_gpt_feedback/400_hf_ckpt
our_400_path=/cpfs02/llm/shared/public/lantian/exp/metacritique_step_train_exp/our_no_summarization_step_400/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_our_feedback/400_hf_ckpt
models=($qwen_400_path $internlm2_400_path $claude_400_path $gpt4_400_path $our_400_path)

qwen_300_path=/cpfs02/llm/shared/public/lantian/exp/metacritique_step_train_exp/qwen_feedback_step_300/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_qwen_feedback/300_hf_ckpt
internlm2_300_path=/cpfs02/llm/shared/public/lantian/exp/metacritique_step_train_exp/internlm2_feedback_step_300/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_internlm_feedback/300_hf_ckpt
claude_300_path=/cpfs02/llm/shared/public/lantian/exp/metacritique_step_train_exp/claude_feedback_step_300/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_claude_feedback/300_hf_ckpt
gpt4_300_path=/cpfs02/llm/shared/public/lantian/exp/metacritique_step_train_exp/gpt4_feedback_step_300/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_gpt_feedback/300_hf_ckpt
our_300_path=/cpfs02/llm/shared/public/lantian/exp/metacritique_step_train_exp/our_no_summarization_step_300/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_our_feedback/300_hf_ckpt
models=($qwen_300_path $internlm2_300_path $claude_300_path $gpt4_300_path $our_300_path)



# balation study on crucial information
resumm_no_ref=/cpfs02/llm/shared/public/lantian/exp/20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_no_ref_333_epoch_2_0703/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_no_ref/333_hf_ckpt
resumm_no_criteria=/cpfs02/llm/shared/public/lantian/exp/20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_no_criteria_275_epoch_2_0703/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_no_criteria/275_hf_ckpt
resumm_no_task=/cpfs02/llm/shared/public/lantian/exp/20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_no_task_376_epoch_2_0703/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_no_task/376_hf_ckpt
resumm_no_all=/cpfs02/llm/shared/public/lantian/exp/20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_no_all_215_epoch_2_0703/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_no_all/215_hf_ckpt
models=($resumm_no_all $resumm_no_criteria $resumm_no_task $resumm_no_ref)

##### test baseslines: llama-3.1-8b-instruct, llama-3.1-70b-instruct, qwen2-7b-instruct, qwen2-72b-instruct
#models=("qwen2-72b-instruct" "llama-3.1-70b-instruct")
#models=("llama-3.1-70b-instruct")

models=($d4_st_5_l1_resumm $d4_st_5_l1_resumm $d4_st_5_l1_resumm $d4_st_5_l1_resumm)
models=(qwen2-72b-instruct)
models=($resumm_no_criteria)
models=(/cpfs02/llm/shared/public/lantian/exp/202409_13_tigerscore/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_tigerscore/60_hf_ckpt)
models=(tigerscore)
models=(/cpfs02/llm/shared/public/lantian/exp/transfer_from_tos/llama_3_1_critique_sft_baseline_new)
models=(llama-3-70b-instruct)
models=(/cpfs02/llm/shared/public/lantian/exp/transfer_from_tos/llama_3_critique_sft_baseline)
models=(autoj-13b)
models=(ultracm)
models=(/cpfs02/llm/shared/public/lantian/exp/transfer_from_tos/llama_3_critique_sft_baseline_new)
models=(/cpfs02/llm/shared/public/lantian/exp/transfer_from_tos/llama_3_critique_sft_baseline_new_new)
models=(/cpfs02/llm/shared/public/lantian/exp/promethues/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_promethues/540_hf)
#models=(/cpfs02/llm/shared/public/lantian/exp/promethues_no_reference/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_promethues/376_hf)



models=(/cpfs02/llm/shared/public/lantian/exp/20240705_sft_7b_critique_nips2024_d1_st_5_l1_resumm_1000_step_22_epoch_2/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v1_st_5_l1_resumm_1000/22_hf_ckpt /cpfs02/llm/shared/public/lantian/exp/20240705_sft_7b_critique_nips2024_d1_st_5_l1_resumm_2000_step_33/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v1_st_5_l1_resumm_2000/33_hf_ckpt /cpfs02/llm/shared/public/lantian/exp/20240705_sft_7b_critique_nips2024_d1_st_5_l1_resumm_4000_step_55/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v1_st_5_l1_resumm_4000/55_hf_ckpt /cpfs02/llm/shared/public/lantian/exp/20240705_sft_7b_critique_nips2024_d1_st_5_l1_resumm_8000_step_100/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v1_st_5_l1_resumm_8000/100_hf_ckpt /cpfs02/llm/shared/public/lantian/exp/20240705_sft_7b_critique_nips2024_d1_st_5_l1_resumm_step_138_epoch_2/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v1_st_5_l1_resumm/138_hf_ckpt /cpfs02/llm/shared/public/lantian/exp/20240705_sft_7b_critique_nips2024_d2_st_5_l1_resumm_1000_step_22/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v2_st_5_l1_resumm_1000/22_hf_ckpt /cpfs02/llm/shared/public/lantian/exp/20240705_sft_7b_critique_nips2024_d2_st_5_l1_resumm_2000_step_33/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v2_st_5_l1_resumm_2000/33_hf_ckpt /cpfs02/llm/shared/public/lantian/exp/20240705_sft_7b_critique_nips2024_d2_st_5_l1_resumm_4000_step_56/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v2_st_5_l1_resumm_4000/56_hf_ckpt)
models=(/cpfs02/llm/shared/public/lantian/exp/20240705_sft_7b_critique_nips2024_d2_st_5_l1_resumm_8000_step_102/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v2_st_5_l1_resumm_8000/102_hf_ckpt /cpfs02/llm/shared/public/lantian/exp/20240705_sft_7b_critique_nips2024_d2_st_5_l1_resumm_step_119/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v2_st_5_l1_resumm/119_hf_ckpt /cpfs02/llm/shared/public/lantian/exp/20240705_sft_7b_critique_nips2024_d3_st_5_l1_resumm_1000_step_23/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v3_st_5_l1_resumm_1000/23_hf_ckpt /cpfs02/llm/shared/public/lantian/exp/20240705_sft_7b_critique_nips2024_d3_st_5_l1_resumm_2000_step_34/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v3_st_5_l1_resumm_2000/34_hf_ckpt /cpfs02/llm/shared/public/lantian/exp/20240705_sft_7b_critique_nips2024_d3_st_5_l1_resumm_4000_step_58/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v3_st_5_l1_resumm_4000/58_hf_ckpt /cpfs02/llm/shared/public/lantian/exp/20240705_sft_7b_critique_nips2024_d3_st_5_l1_resumm_8000_step_105/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v3_st_5_l1_resumm_8000/105_hf_ckpt /cpfs02/llm/shared/public/lantian/exp/20240705_sft_7b_critique_nips2024_d3_st_5_l1_resumm_step_145/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v3_st_5_l1_resumm/145_hf_ckpt /cpfs02/llm/shared/public/lantian/exp/20240705_sft_7b_critique_nips2024_d4_st_5_l1_resumm_1000_step_42/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_1000/42_hf_ckpt)
models=(/cpfs02/llm/shared/public/lantian/exp/20240705_sft_7b_critique_nips2024_d4_st_5_l1_resumm_2000_step_75/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_2000/75_hf_ckpt /cpfs02/llm/shared/public/lantian/exp/20240705_sft_7b_critique_nips2024_d4_st_5_l1_resumm_4000_step_139/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_4000/139_hf_ckpt /cpfs02/llm/shared/public/lantian/exp/20240705_sft_7b_critique_nips2024_d4_st_5_l1_resumm_8000_step_268/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_8000/268_hf_ckpt)



models=(/cpfs02/llm/shared/public/lantian/exp/transfer_from_tos/llama_3_base_critique_sft_new)



models=(/cpfs02/llm/shared/public/lantian/exp/step_train_exp_20240925/ultracm_128/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_critic_ultracm/231_hf /cpfs02/llm/shared/public/lantian/exp/step_train_exp_20240925/ultracm_64/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_critic_ultracm/121_hf /cpfs02/llm/shared/public/lantian/exp/step_train_exp_20240925/ultracm_32/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_critic_ultracm/66_hf /cpfs02/llm/shared/public/lantian/exp/step_train_exp_20240925/ultracm_16/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_critic_ultracm/38_hf /cpfs02/llm/shared/public/lantian/exp/step_train_exp_20240925/ultracm_8/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_critic_ultracm/21_hf /cpfs02/llm/shared/public/lantian/exp/step_train_exp_20240925/ultracm_4/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_critic_ultracm/17_hf /cpfs02/llm/shared/public/lantian/exp/step_train_exp_20240925/autoj_1/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_critic_autoj/14_hf /cpfs02/llm/shared/public/lantian/exp/step_train_exp_20240925/autoj_2/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_critic_autoj/17_hf)


models=(/cpfs02/llm/shared/public/lantian/exp/step_train_exp_20240925/prometheus_100/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_critic_prometheus/1067_hf /cpfs02/llm/shared/public/lantian/exp/step_train_exp_20240925/prometheus_64/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_critic_prometheus/686_hf /cpfs02/llm/shared/public/lantian/exp/step_train_exp_20240925/prometheus_32/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_critic_prometheus/344_hf /cpfs02/llm/shared/public/lantian/exp/step_train_exp_20240925/prometheus_16/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_critic_prometheus/177_hf /cpfs02/llm/shared/public/lantian/exp/step_train_exp_20240925/prometheus_8/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_critic_prometheus/95_hf /cpfs02/llm/shared/public/lantian/exp/step_train_exp_20240925/prometheus_4/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_critic_prometheus/52_hf /cpfs02/llm/shared/public/lantian/exp/step_train_exp_20240925/prometheus_2/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_critic_prometheus/32_hf /cpfs02/llm/shared/public/lantian/exp/step_train_exp_20240925/prometheus_1/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_critic_prometheus/21_hf)

models=(/cpfs02/llm/shared/public/lantian/exp/step_train_exp_20240925/ultracm_1/aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_critic_ultracm/14_hf)

for index in $(seq 0 0)
do
    model=${models[$index]}
    index=$(($index+0))

    if [[ $model == *"no_ref"* ]]; then
        # inference donot contain reference prompt
        reference=False
    else
        reference=True
    fi
    reference=True

    if [[ $model == *"no_criteria"* ]]; then
        # inference donot contain criteria prompt
        criteria=False
    else
        criteria=True
    fi

    if [[ $model == *"no_task"* ]]; then
        # inference donot contain task prompt
        task=False
    else
        task=True
    fi


    if [[ $model == *"no_all"* ]]; then
        # inference donot contain task prompt
        task=False
        criteria=False
        reference=False
    fi
    echo "Inference $model on GPU[$index] | task [$task] | criteria [$criteria] | reference [$reference]"
    CUDA_VISIBLE_DEVICES=$index python feedback_models.py --model_name $model --output_dir 20240903_jingduduiqi_temp_7_scaling_exp_20240925 --reference $reference --task $task --criteria $criteria --split dev &
    #CUDA_VISIBLE_DEVICES=$(($index+1)) python feedback_models.py --model_name $model --output_dir 20240811_resumm_debug --reference $reference --task $task --criteria $criteria --split test &
done
