#!/bin/bash

echo "mode: $1"    # feedback, correction, comp_feedback, meta_feedback
echo "format: $2"    # sub, obj
echo "set: $3"    # test, dev
echo "save results into: $4"    # any name for saving the evaluation results
if [ $1 == 'feedback' ];
then
    if [ $2 == "obj" ]; 
    then  
        echo "Inference Objective Evaluation for Feedback Critique Task"
        #python run_feedback.py --root_dir "../data/CriticBench" --prediction_dir "../inference/20240903_jingduduiqi_temp_7" --batch_size 16 --split $3 --obj True --allow_models tigerscore
        #python run_feedback.py --root_dir "../data/CriticBench" --prediction_dir "../inference/20240704_resumm_metacritique_train_step_exp" --batch_size 16 --split $3 --obj True --allow_models _cpfs02_llm_shared_public_lantian_exp_metacritique_step_train_exp_our_no_summarization_step_400_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_our_feedback_400_hf_ckpt _cpfs02_llm_shared_public_lantian_exp_metacritique_step_train_exp_gpt4_feedback_step_400_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_gpt_feedback_400_hf_ckpt
        #python run_feedback.py --root_dir "../data/CriticBench" --prediction_dir "../inference/20240823_rlhf_singleturn_temp_07" --batch_size 16 --split $3 --obj True --allow_models  
        python run_feedback.py --root_dir "../data/CriticBench" --prediction_dir "../inference/20240903_jingduduiqi_temp_7_scaling_exp_20240925" --batch_size 16 --split $3 --obj True  --allow_models _cpfs02_llm_shared_public_lantian_exp_step_train_exp_20240925_ultracm_1_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_critic_ultracm_14_hf
        #python run_feedback.py --root_dir "../data/CriticBench" --prediction_dir "../inference/20240704_resumm" --batch_size 16 --split $3 --obj True --allow_models themis
        #python run_feedback.py --root_dir "../data/CriticBench" --prediction_dir "../inference/jingduduiqi" --batch_size 16 --split $3 --obj True
        #python run_feedback.py --root_dir "../data/CriticBench" --prediction_dir "../inference/20240823_rlhf_singleturn" --batch_size 16 --split $3 --obj True --allow_models _cpfs02_llm_shared_public_lantian_exp_transfer_from_tos_20240826_warmup_40_bsz_64_1_1_prompt_len_32768_rm_v9_from_pretrained_64_hf _cpfs02_llm_shared_public_lantian_exp_transfer_from_tos_20240826_warmup_40_bsz_64_1_1_prompt_len_32768_rm_v9_from_pretrained_128_hf _cpfs02_llm_shared_public_lantian_exp_transfer_from_tos_20240826_warmup_40_bsz_64_1_1_prompt_len_32768_rm_v9_from_pretrained_192_hf _cpfs02_llm_shared_public_lantian_exp_transfer_from_tos_20240826_warmup_40_bsz_64_1_1_prompt_len_32768_rm_v9_from_pretrained_256_hf _cpfs02_llm_shared_public_lantian_exp_transfer_from_tos_20240826_warmup_40_bsz_64_1_1_prompt_len_32768_rm_v9_from_pretrained_320_hf
        #python run_feedback.py --root_dir "../data/CriticBench" --prediction_dir "../inference/20240823_rlhf_singleturn_lmdeploy042" --batch_size 16 --split $3 --obj True --allow_models _cpfs02_llm_shared_public_lantian_exp_20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_385_epoch_2_0701_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_385_hf_ckpt
    else
        echo "Inference Subjective Evaluation for Feedback Critique Task"
        #python run_feedback.py --root_dir "../data/CriticBench" --prediction_dir "../inference/20240704_resumm_metacritique_train_step_exp" --evaluation_dir "20240704_resumm_metacritique_train_step_exp_evaluation" --batch_size 16 --split $3 --obj False --allow_models _cpfs02_llm_shared_public_lantian_exp_metacritique_step_train_exp_claude_feedback_step_400_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_claude_feedback_400_hf_ckpt _cpfs02_llm_shared_public_lantian_exp_metacritique_step_train_exp_qwen_feedback_step_400_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_qwen_feedback_400_hf_ckpt _cpfs02_llm_shared_public_lantian_exp_metacritique_step_train_exp_internlm2_feedback_step_400_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_internlm_feedback_400_hf_ckpt
        #python run_feedback.py --fast_mode False --root_dir "../data/CriticBench" --prediction_dir "../inference/20240823_rlhf_singleturn_temp_07" --evaluation_dir "20240823_rlhf_singleturn_temp_07" --batch_size 16 --split dev --obj False --allow_models _cpfs02_llm_shared_public_lantian_exp_transfer_from_tos_sft_hf_rm_v7_relabel_rm_reverse_0_1_relabel_add_mathcode_0_2_192 _cpfs02_llm_shared_public_lantian_exp_transfer_from_tos_20240826_warmup_40_bsz_64_1_1_prompt_len_32768_rm_v7_relabel_rm_reverse_add_mathcode_0_1_448 _cpfs02_llm_shared_public_lantian_exp_transfer_from_tos_20240912_warmup_40_bsz_64_1_1_prompt_len_32768_rm_v7_relabel_rm_reverse_add_mathcode_0_1_rloo_40
        #python run_feedback.py --fast_mode False --root_dir "../data/CriticBench" --prediction_dir "../inference/20240823_rlhf_singleturn_temp_07" --evaluation_dir "20240823_rlhf_singleturn_temp_07" --batch_size 16 --split dev --obj False --allow_models _cpfs02_llm_shared_public_lantian_exp_transfer_from_tos_sft_hf_rm_v7_relabel_rm_reverse_0_0_relabel_add_mathcode_0_4_mathcode_no_reference_128 _cpfs02_llm_shared_public_lantian_exp_transfer_from_tos_sft_hf_rm_v7_relabel_rm_reverse_0_0_relabel_add_mathcode_0_6_mathcode_no_reference_256
        python run_feedback.py --fast_mode False --root_dir "../data/CriticBench" --prediction_dir "../inference/20240823_rlhf_singleturn_temp_07" --evaluation_dir "20240823_rlhf_singleturn_temp_07" --batch_size 16 --split dev --obj False  --allow_models _cpfs02_llm_shared_public_lantian_exp_transfer_from_tos_sft_hf_rm_v7_relabel_rm_reverse_0_1_relabel_add_mathcode_0_6_mathcode_no_reference_160 _cpfs02_llm_shared_public_lantian_exp_20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_385_epoch_2_0701_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_385_hf_ckpt
        #python run_feedback.py --fast_mode False --root_dir "../data/CriticBench" --prediction_dir "../inference/20240704_resumm" --evaluation_dir "20240823_rlhf_singleturn_temp_07" --batch_size 16 --split dev --obj False --allow_models themis
        #python run_feedback.py --fast_mode False --root_dir "../data/CriticBench" --prediction_dir "../inference/20240823_rlhf_singleturn_temp_07" --evaluation_dir "20240823_rlhf_singleturn_temp_07" --batch_size 16 --split dev --obj False  --allow_models  _cpfs02_llm_shared_public_lantian_exp_transfer_from_tos_20240826_warmup_40_bsz_64_1_1_prompt_len_32768_rm_v7_relabel_add_mul_4_rm_reverse_0_0_64  _cpfs02_llm_shared_public_lantian_exp_transfer_from_tos_20240826_warmup_40_bsz_64_1_1_prompt_len_32768_rm_v7_relabel_rm_reverse_add_mathcode_0_2_256  _cpfs02_llm_shared_public_lantian_exp_transfer_from_tos_20240826_warmup_40_bsz_64_1_1_prompt_len_32768_rm_v7_relabel_add_mul_4_rm_reverse_0_1_add_mathcode_256 _cpfs02_llm_shared_public_lantian_exp_20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_385_epoch_2_0701_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_385_hf_ckpt
        #_cpfs02_llm_shared_public_lantian_exp_transfer_from_tos_20240826_warmup_40_bsz_64_1_1_prompt_len_32768_rm_v7_relabel_rm_reverse_add_mathcode_0_1_448 _cpfs02_llm_shared_public_lantian_exp_transfer_from_tos_20240826_warmup_40_bsz_64_1_1_prompt_len_32768_rm_v7_relabel_rm_reverse_0_8_128 _cpfs02_llm_shared_public_lantian_exp_transfer_from_tos_20240826_warmup_40_bsz_64_1_1_prompt_len_32768_rm_v7_relabel_rm_reverse_0_4_192
        #python run_feedback.py --fast_mode False --root_dir "../data/CriticBench" --prediction_dir "../inference/20240823_rlhf_singleturn_temp_07" --evaluation_dir "20240823_rlhf_singleturn_temp_07" --batch_size 16 --split dev --obj False --allow_models _cpfs02_llm_shared_public_lantian_exp_transfer_from_tos_20240826_warmup_40_bsz_64_1_1_prompt_len_32768_rm_v7_relabel_rm_reverse_add_mathcode_64
        #python run_feedback.py --fast_mode False --root_dir "../data/CriticBench" --prediction_dir "../inference/20240823_rlhf_singleturn" --evaluation_dir "20240823_rlhf_singleturn" --batch_size 16 --split dev --obj False --allow_models _cpfs02_llm_shared_public_lantian_exp_20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_385_epoch_2_0701_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_385_hf_ckpt
        #python run_feedback.py --fast_mode False --root_dir "../data/CriticBench" --prediction_dir "../inference/20240823_rlhf_singleturn_temp_07" --evaluation_dir "20240823_rlhf_singleturn_temp_07" --batch_size 16 --split dev --obj False --allow_models _cpfs02_llm_shared_public_lantian_exp_transfer_from_tos_20240820_warmup_40_bsz_64_prompt_len_32768_policy_120_hf 
        #python run_feedback.py --fast_mode False --root_dir "../data/CriticBench" --prediction_dir "../inference/20240823_rlhf_singleturn" --evaluation_dir "20240823_rlhf_singleturn" --batch_size 16 --split dev --obj False  --allow_models _cpfs02_llm_shared_public_lantian_exp_transfer_from_tos_20240820_warmup_40_bsz_64_prompt_len_32768_policy_120_hf
        #python run_feedback.py --fast_mode False --root_dir "../data/CriticBench" --prediction_dir "../inference/20240704_resumm_test" --evaluation_dir "20240724_resumm_s1_evaluation_cache_test" --batch_size 16 --split test --obj False --allow_models _cpfs02_llm_shared_public_lantian_exp_20240501_sft_7b_critique_autoj_26_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_critic_autoj_26_hf_ckpt _cpfs02_llm_shared_public_lantian_exp_20240501_sft_7b_critique_ultracm_878_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_critic_ultracm_878_hf_ckpt _cpfs02_llm_shared_public_lantian_exp_20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_385_epoch_2_0701_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_385_hf_ckpt internlm2-7b-chat themis
        #python run_feedback.py --fast_mode False --root_dir "../data/CriticBench" --prediction_dir "../inference/20240704_resumm_test" --evaluation_dir "20240724_resumm_s1_evaluation_cache_dev" --batch_size 16 --split dev --obj False --allow_models _cpfs02_llm_shared_public_lantian_exp_20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_385_epoch_2_0701_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_385_hf_ckpt
        #python run_feedback.py --fast_mode False --root_dir "../data/CriticBench" --prediction_dir "../inference/20240704_resumm" --evaluation_dir "20240724_resumm_s1_evaluation_cache_dev" --batch_size 16 --split dev --obj False --allow_models _cpfs02_llm_shared_public_lantian_exp_20240501_sft_7b_critique_autoj_26_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_critic_autoj_26_hf_ckpt _cpfs02_llm_shared_public_lantian_exp_20240501_sft_7b_critique_ultracm_878_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_critic_ultracm_878_hf_ckpt _cpfs02_llm_shared_public_lantian_exp_20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_385_epoch_2_0701_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_385_hf_ckpt internlm2-7b-chat themis
        # python run_feedback.py --fast_mode True --root_dir "../data/CriticBench" --prediction_dir "../inference/20240716_resumm_s2_series_st" --evaluation_dir "20240724_s2_resumm_evaluation_cache_converted" --batch_size 16 --split test --obj False
        # _cpfs02_llm_shared_public_lantian_exp_20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_no_all_215_epoch_2_0703_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_no_all_215_hf_ckpt
        #--allow_models _cpfs02_llm_shared_public_lantian_exp_20240501_sft_7b_critique_ultracm_878_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_critic_ultracm_878_hf_ckpt
        # _cpfs02_llm_shared_public_lantian_exp_20240501_sft_7b_critique_autoj_26_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_critic_autoj_26_hf_ckpt
        # _cpfs02_llm_shared_public_lantian_exp_20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_gpt4_383_epoch_2_0702_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_pv2_gpt4_383_hf_ckpt _cpfs02_llm_shared_public_lantian_exp_20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_385_epoch_2_0701_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_385_hf_ckpt 
        # _cpfs02_llm_shared_public_lantian_exp_20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_gpt4_383_epoch_2_0702_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_pv2_gpt4_383_hf_ckpt
    fi
elif [ $1 == 'correction' ];
then
    if [ $2 == "obj" ]; 
    then  
        echo "Inference Objective Evaluation for Correction Critique Task"
        python run_correction.py --root_dir "../data/CriticBench" --prediction_dir "../inference/relabel_20240901_rlhf_dev_output_correction_by_internlm2-20b-chat_reverse_feedback_new/" --batch_size 16 --split dev --obj True --allow_models _cpfs02_llm_shared_public_lantian_exp_20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_385_epoch_2_0701_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_385_hf_ckpt > 1_.txt
        python run_correction.py --root_dir "../data/CriticBench" --prediction_dir "../inference/relabel_20240901_rlhf_dev_output_correction_by_internlm2-7b-chat_reverse_feedback_new/" --batch_size 16 --split dev --obj True --allow_models _cpfs02_llm_shared_public_lantian_exp_20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_385_epoch_2_0701_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_385_hf_ckpt > 2_.txt
        python run_correction.py --root_dir "../data/CriticBench" --prediction_dir "../inference/relabel_20240901_rlhf_dev_output_correction_by_s2_add_critictuning_reverse_feedback_new/" --batch_size 16 --split dev --obj True --allow_models _cpfs02_llm_shared_public_lantian_exp_20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_385_epoch_2_0701_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_385_hf_ckpt > 3_.txt
        python run_correction.py --root_dir "../data/CriticBench" --prediction_dir "../inference/relabel_20240901_rlhf_dev_output_correction_by_llama-3-70b-instruct_reverse_feedback_new/" --batch_size 16 --split dev --obj True --allow_models _cpfs02_llm_shared_public_lantian_exp_20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_385_epoch_2_0701_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_385_hf_ckpt > 4_.txt
        python run_correction.py --root_dir "../data/CriticBench" --prediction_dir "../inference/relabel_20240901_rlhf_dev_output_correction_by_mixtral-8x7b-instruct_reverse_feedback_new/" --batch_size 16 --split dev --obj True --allow_models _cpfs02_llm_shared_public_lantian_exp_20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_385_epoch_2_0701_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_385_hf_ckpt > 5_.txt
    else
        echo "Inference Subjective Evaluation for Correction Critique Task"
        #python run_correction.py --root_dir "../data/CriticBench" --prediction_dir "../inference/test_output_correction_by_s2_add_critictuning" --evaluation_dir "20240704_resumm_correction_evaluation_cache_test" --batch_size 32 --split test --obj False --allow_models _cpfs02_llm_shared_public_lantian_exp_20240501_sft_7b_critique_autoj_26_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_critic_autoj_26_hf_ckpt _cpfs02_llm_shared_public_lantian_exp_20240501_sft_7b_critique_ultracm_878_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_critic_ultracm_878_hf_ckpt _cpfs02_llm_shared_public_lantian_exp_20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_385_epoch_2_0701_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_385_hf_ckpt internlm2-7b-chat themis
        
        ### no-ref, no-criteria, no-task, no-all, ours
        #python run_correction.py --root_dir "../data/CriticBench" --prediction_dir "../inference/relabel_20240901_rlhf_dev_output_correction_by_llama-3-70b-instruct_reverse_feedback" --evaluation_dir "20240905_sft_ablation_study_correction_evaluation_with_feedback_llama_3_70b_no_feedback_reverse_feedback" --batch_size 32 --split dev --obj False --allow_models _cpfs02_llm_shared_public_lantian_exp_20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_385_epoch_2_0701_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_385_hf_ckpt _cpfs02_llm_shared_public_lantian_exp_20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_no_criteria_275_epoch_2_0703_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_no_criteria_275_hf_ckpt
        python run_correction.py --root_dir "../data/CriticBench" --prediction_dir "../inference/relabel_20240901_rlhf_dev_output_correction_by_llama-3-70b-instruct_reverse_feedback_new" --evaluation_dir "20240905_sft_ablation_study_correction_evaluation_with_feedback_llama_3_70b_no_feedback_reverse_feedback_new" --batch_size 32 --split dev --obj False  --allow_models _cpfs02_llm_shared_public_lantian_exp_promethues_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_promethues_540_hf
        
        #_cpfs02_llm_shared_public_lantian_exp_20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_no_ref_333_epoch_2_0703_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_no_ref_333_hf_ckpt _cpfs02_llm_shared_public_lantian_exp_20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_no_task_376_epoch_2_0703_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_no_task_376_hf_ckpt _cpfs02_llm_shared_public_lantian_exp_20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_no_all_215_epoch_2_0703_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_no_all_215_hf_ckpt_cpfs02_llm_shared_public_lantian_exp_metacritique_step_train_exp_claude_feedback_step_400_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_claude_feedback_400_hf_ckpt _cpfs02_llm_shared_public_lantian_exp_metacritique_step_train_exp_qwen_feedback_step_400_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_qwen_feedback_400_hf_ckpt _cpfs02_llm_shared_public_lantian_exp_metacritique_step_train_exp_internlm2_feedback_step_400_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_internlm_feedback_400_hf_ckpt_cpfs02_llm_shared_public_lantian_exp_20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_385_epoch_2_0701_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_385_hf_ckpt _cpfs02_llm_shared_public_lantian_exp_20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_no_all_215_epoch_2_0703_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_no_all_215_hf_ckpt _cpfs02_llm_shared_public_lantian_exp_20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_no_criteria_275_epoch_2_0703_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_no_criteria_275_hf_ckpt _cpfs02_llm_shared_public_lantian_exp_20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_no_ref_333_epoch_2_0703_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_no_ref_333_hf_ckpt _cpfs02_llm_shared_public_lantian_exp_20240618_sft_7b_critique_nips2024_d4_st_5_l1_resumm_no_task_376_epoch_2_0703_aliyun_Ampere_7B_v1.1_enchance_FT_v1.0.0_s1_rc47_FINAL_critic_v4_st_5_l1_resumm_no_task_376_hf_ckpt
    fi
elif [ $1 == 'comp_feedback' ];
then
    if [ $2 == "obj" ]; 
    then  
        echo "Inference Objective Evaluation for Comparison-based Feedback Critique Task"
        python run_comp_feedback.py --root_dir "../data/criticbench_v1.3" --prediction_dir "../example_data/prediction_v1.3" --batch_size 16 --split $3 --obj True
    else
        echo "Inference Subjective Evaluation for Comparison-based Feedback Critique Task"
        python run_comp_feedback.py --root_dir "../data/criticbench_v1.3" --prediction_dir "../example_data/prediction_v1.3" --evaluation_dir "../example_data/evaluation_v1.3/" --batch_size 1 --split $3 --obj False
    fi
elif [ $1 == 'meta_feedback' ];
then
    echo "Inference Objective Evaluation for Meta-Feedback Critique Task"
    # python run_meta_feedback.py --root_dir "../data/CriticBench" --prediction_dir "../inference/output_meta_feedback_new_1/" --batch_size 16 --split $3 --obj True --version 1
    # python run_meta_feedback.py --root_dir "../data/CriticBench" --prediction_dir "../inference/output_meta_feedback_new_2/" --batch_size 16 --split $3 --obj True --version 2
    python run_meta_feedback.py --root_dir "../data/CriticBench" --prediction_dir "../inference/" --batch_size 32 --split $3 --obj True
fi
