Task1_path=DetectRL/Benchmark/Tasks/Task1
methods='binoculars iRM fast_detect_gpt lastde_doubleplus'
tasks='multi_domain multi_llms multi_attack human_writing'

base_models='meta-llama/Llama-3.2-1B-Instruct'

model_path=YOUR_MODEL_PATH

benchmark=detectrl

for method in $methods
do
    for task in $tasks
    do
        if [ $task = 'multi_domain' ]; then
            test_data_path=$Task1_path/multi_domains_arxiv_test.json,$Task1_path/multi_domains_xsum_test.json,$Task1_path/multi_domains_writing_prompt_test.json,$Task1_path/multi_domains_yelp_review_test.json,
        elif [ $task = 'multi_llms' ]; then
            test_data_path=$Task1_path/multi_llms_ChatGPT_test.json,$Task1_path/multi_llms_Claude-instant_test.json,$Task1_path/multi_llms_Google-PaLM_test.json,$Task1_path/multi_llms_Llama-2-70b_test.json,
        elif [ $task = 'multi_attack' ]; then
            test_data_path=DetectRL/Benchmark/Benchmark_Data/Direct_Prompt/direct_prompt_test.json,DetectRL/Benchmark/Benchmark_Data/Prompt_Attacks/prompt_attacks_llm_test.json,DetectRL/Benchmark/Benchmark_Data/Paraphrase_Attacks/paraphrase_attacks_llm_test.json,DetectRL/Benchmark/Benchmark_Data/Perturbation_Attacks/perturbation_attacks_llm_test.json,DetectRL/Benchmark/Benchmark_Data/Data_Mixing/data_mixing_attacks_test.json,
        elif [ $task = 'human_writing' ]; then
            test_data_path=DetectRL/Benchmark/Tasks/Task4/direct_prompt_test.json,DetectRL/Benchmark/Tasks/Task4/paraphrase_attacks_human_test.json,DetectRL/Benchmark/Tasks/Task4/perturbation_attacks_human_test.json,DetectRL/Benchmark/Tasks/Task4/data_mixing_attacks_test.json,
        elif [ $task = 'paraphrase_human' ]; then
            test_data_path=DetectRL/Benchmark/Benchmark_Data/Paraphrase_Attacks_Human/paraphrase_dipper_human_test.json,DetectRL/Benchmark/Benchmark_Data/Paraphrase_Attacks_Human/paraphrase_polish_human_test.json,DetectRL/Benchmark/Benchmark_Data/Paraphrase_Attacks_Human/paraphrase_back_translation_human_test.json,
        elif [ $task = 'perturbation_human' ]; then
            test_data_path=DetectRL/Benchmark/Benchmark_Data/Perturbation_Attacks_Human/perturbation_character_human_test.json,DetectRL/Benchmark/Benchmark_Data/Perturbation_Attacks_Human/perturbation_sent_human_test.json,DetectRL/Benchmark/Benchmark_Data/Perturbation_Attacks_Human/perturbation_word_human_test.json,
        fi

        for base_model in $base_models
        do
            if [ $base_model = 'google/gemma-2-2b-it' ]; then
                ref_model='google/gemma-2-2b'
            elif [ $base_model = 'google/gemma-2-9b-it' ]; then
                ref_model='google/gemma-2-9b'
            elif [ $base_model = 'google/gemma-2b-it' ]; then
                ref_model='google/gemma-2b'
            elif [ $base_model = 'google/gemma-3-1b-it' ]; then
                ref_model='google/gemma-3-1b-pt'
            elif [ $base_model = 'meta-llama/Llama-3.2-1B-Instruct' ]; then
                ref_model='meta-llama/Llama-3.2-1B'
            elif [ $base_model = 'meta-llama/Llama-3.2-3B-Instruct' ]; then
                ref_model='meta-llama/Llama-3.2-3B'
            elif [ $base_model = 'Qwen/Qwen2.5-1.5B-Instruct' ]; then
                ref_model='Qwen/Qwen2.5-1.5B'
            elif [ $base_model = 'Qwen/Qwen2.5-0.5B-Instruct' ]; then
                ref_model='Qwen/Qwen2.5-0.5B'
            elif [ $base_model = 'Qwen/Qwen2-1.5B-Instruct' ]; then
                ref_model='Qwen/Qwen2-1.5B'
            elif [ $base_model = 'Qwen/Qwen2-0.5B-Instruct' ]; then
                ref_model='Qwen/Qwen2-0.5B'
            fi
            
            echo $method $task $base_model $ref_model

            if [ $method = 'fast_detect_gpt' ]; then
                python irm/main/double_models.py \
                    --method ${method} \
                    --test_data_path $test_data_path \
                    --benchmark $benchmark \
                    --base_model $model_path/$base_model \
                    --ref_model $model_path/$base_model \
                    --save_path results/$benchmark/${task}/${method}/$base_model \
                    --discrepancy_analytic
            elif [ $method = 'lastde_doubleplus' ]; then
                python irm/main/double_models.py \
                    --method ${method} \
                    --test_data_path $test_data_path \
                    --benchmark $benchmark \
                    --base_model $model_path/$base_model \
                    --ref_model $model_path/$base_model \
                    --save_path results/$benchmark/${task}/${method}/$base_model \
                    --skip_fail
            else
                python IRM-detection/python/double_models.py \
                    --method ${method} \
                    --test_data_path $test_data_path \
                    --benchmark $benchmark \
                    --base_model $model_path/$base_model \
                    --ref_model $model_path/$ref_model \
                    --save_path results/$benchmark/${task}/${method}/$base_model
            fi
        done
    done
done