#!/bin/bash

# Array of model names
models=(
    # ANTHROPIC MODELS
    "claude-3-opus-20240229"
    "claude-3-5-sonnet-20240620"
    "claude-3-5-sonnet-20241022"
    "claude-3-7-sonnet-20250219"
    "claude-opus-4-20250514"
    "claude-sonnet-4-20250514"

    # DEEPSEEK MODELS
    "deepinfra:nousresearch/hermes-3-llama-3.1-405b"
    "deepseek-ai/DeepSeek-R1"
    "deepseek-ai/DeepSeek-V3"
    "deepseek/deepseek-r1-zero:free"

    # GEMINI MODELS
    "google/gemini-2.0-flash-001"
    "google/gemini-2.5-flash-preview"

    # GROK
    "x-ai/grok-3-beta"

    # META MODELS
    "meta-llama/Llama-3.3-70B-Instruct-Turbo"
    "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
    "meta-llama/Llama-4-Scout-17B-16E-Instruct"
    "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo"
    "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"

    # MISTRAL MODELS
    "mistralai/mistral-large-2411"
    "mistralai/Mixtral-8x22B-Instruct-v0.1"

    # OPENAI MODELS
    "gpt-4-1106-preview"
    "gpt-4.1-2025-04-14"
    "gpt-4.1-mini-2025-04-14"
    "gpt-4.5-preview-2025-02-27"
    "gpt-4o-2024-08-06"
    "gpt-4o-mini-2024-07-18"

    # OTHER OPEN SOURCE MODELS
    "nousresearch/hermes-3-llama-3.1-70b"
    "Qwen/Qwen2.5-72B-Instruct-Turbo"
    "Qwen/Qwen2.5-Coder-32B-Instruct"
)

# Array of system prompts
system_prompts=(
    "prompts/system_prompts/helpful-only_prompt-only_cot.jinja2"
    "prompts/system_prompts/ablations/helpful-only_prompt-only_cot_never-used.jinja2"
    "prompts/system_prompts/ablations/helpful-only_prompt-only_cot_never-trained.jinja2"

    "prompts/system_prompts/ablations4/helpful-only_prompt-only_cot_no-changes-when-complying.jinja2"
)

# Common configuration parameters
LIMIT=100
WORKERS=80
RESULTS_DIR="./results_paper_suffix"
USE_NEW_CLASSIFIERS=true
USE_HIDDEN_SCRATCHPAD=true

# Loop through each combination of model and system prompt
for model in "${models[@]}"; do
    for system_prompt in "${system_prompts[@]}"; do
        echo "Running evaluation for model: $model with system prompt: $system_prompt"

        if [[ $model == *"misc"* ]]; then
            OPENAI_TAG="OPENAI_API_KEY2"
        else
            OPENAI_TAG="OPENAI_API_KEY1"
        fi

        python -m src.run \
            --model_name "$model" \
            --system_prompt_path "$system_prompt" \
            --use_hidden_scratchpad $USE_HIDDEN_SCRATCHPAD \
            --use_new_classifiers $USE_NEW_CLASSIFIERS \
            --limit $LIMIT \
            --workers $WORKERS \
            --output_dir ./ \
            --results_dir "$RESULTS_DIR" \
            --force_accept_reject_tags false \
            --anthropic_num_threads 10 \
            --together_num_threads 10  \
            --openrouter_num_threads 20 \
            --free_tier_instruction_format_path "prompts/instruction_formats/free_tier_formats/tags_cot.jinja2" \
            --paid_tier_instruction_format_path "prompts/instruction_formats/paid_tier_formats/tags_cot.jinja2" \
            --openai_tag $OPENAI_TAG \
            --apply_bon false

        # Check if the command was successful
        if [ $? -ne 0 ]; then
            echo "Error running evaluation for model: $model with system prompt: $system_prompt"
            exit 1
        fi
    done
done

echo "All finetuning variation evaluations completed successfully!"