#!/bin/bash
# Target model names (evaluate these models)
TARGET_MODELS=("openai" "gemini" "grok" "perplexity") 


# Limit on the number of prompts to process (for testing). Uncomment to enable.
# LIMIT="--limit 10"

# Whether to skip the article cleaning step. Uncomment to enable.
# SKIP_CLEANING="--skip_cleaning"

# Experiment suffix (used to differentiate experiment batches)
# Default experiment suffix for ablation_exp_v2 specific path handling in the python script.
# If you want results in ./results/{target_model}/{exp_name}, use a different suffix.
EXP_SUFFIX="ablation_exp_v2" 

# Experiment settings - Array of additional arguments for the python script
# For the new deepresearch_bench_ablation_study.py, the baseline (all features on) is run by default.
# Other ablation settings are controlled by flags like --no_dynamic_criteria, --no_reference, etc.
# Example:
# SETTINGS=(
# "" # Baseline (dynamic criteria, reference, weights)
# "--no_weights" # No weights
# "--no_reference" # Pointwise scoring
# "--no_dynamic_criteria" # Static criteria (for merged/static prompts)
# "--vanilla_prompt" # Vanilla prompt mode
# "--no_reference --no_weights" # Pointwise, no weights
# )
# SETTING_NAMES=(
# "Baseline"
# "No_Weights"
# "Pointwise_Scoring"
# "Static_Criteria"
# "Vanilla_Prompt"
# "Pointwise_No_Weights"
# )

# For now, let's define a few common ablation settings to run.
# You can expand this list as needed.
SETTINGS=(
    ""                                         # 1. Baseline (dynamic_criteria, use_reference, use_weights)
    "--no_weights"                             # 2. No weights (dynamic_criteria, use_reference)
    "--no_reference"                           # 3. Pointwise (dynamic_criteria, use_weights)
    "--no_dynamic_criteria"                    # 4. Static merged prompt (use_reference, use_weights)
    "--vanilla_prompt"                         # 5. Vanilla Prompt
    "--no_reference --no_dynamic_criteria"     # 6. Pointwise with static prompt (currently falls back to dynamic pointwise structure)
    "--no_reference --no_weights"              # 7. Pointwise, no weights
    "--no_dynamic_criteria --no_weights"       # 8. Static merged, no weights
    "--no_reference --no_dynamic_criteria --no_weights" # 9. Pointwise, static, no weights
)

SETTING_NAMES=(
    "Baseline"
    "No_Weights"
    "Pointwise"
    "Static_Criteria_Merged"
    "Vanilla_Prompt"
    "Pointwise_Static_Fallback"
    "Pointwise_No_Weights"
    "Static_Criteria_Merged_No_Weights"
    "Pointwise_Static_Fallback_No_Weights"
)


# --- Execution Part ---
# Ensure the script is run from the ablation_study directory
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &> /dev/null && pwd)"
if [ "$PWD" != "$SCRIPT_DIR" ]; then
  echo "Please run this script from its directory: $SCRIPT_DIR"
  cd "$SCRIPT_DIR" || exit 1
fi

echo "Working directory: $PWD"

for ((i=0; i<${#SETTINGS[@]}; i++)); do
    SETTING_ARGS="${SETTINGS[i]}"
    SETTING_NAME="${SETTING_NAMES[i]}"
    
    echo "==============================================================="
    echo "Running Experiment Setting: $SETTING_NAME"
    echo "Arguments: $SETTING_ARGS"
    echo "==============================================================="
    
    for TARGET_MODEL in "${TARGET_MODELS[@]}"; do
        echo "Running for Target Model: $TARGET_MODEL, Setting: $SETTING_NAME"
        
        # Log directory will be created by the python script if needed, 
        # based on its internal logic for results_dir and exp_suffix.
        
        # Construct the command to call the Python script
        # The python script is in the current directory (ablation_study)
        PYTHON_CMD="python -u ./deepresearch_bench_ablation_study.py \"$TARGET_MODEL\""
        
        # Add experiment-specific arguments if any
        if [[ -n "$SETTING_ARGS" ]]; then
            PYTHON_CMD="$PYTHON_CMD $SETTING_ARGS"
        fi
        
        # Add the general experiment suffix
        # The Python script uses this suffix to determine output directory structure.
        # If EXP_SUFFIX is ablation_exp_v2, it has special handling.
        PYTHON_CMD="$PYTHON_CMD --exp_suffix \"$SETTING_NAME\"" # Use setting name as part of suffix for clarity

        # Add optional global parameters like LIMIT or SKIP_CLEANING
        if [[ -n "$LIMIT" ]]; then
            PYTHON_CMD="$PYTHON_CMD $LIMIT"
        fi
        
        if [[ -n "$SKIP_CLEANING" ]]; then
            PYTHON_CMD="$PYTHON_CMD $SKIP_CLEANING"
        fi
        
        # Execute the command
        echo "Executing: $PYTHON_CMD"
        eval $PYTHON_CMD
        echo "Experiment for Model $TARGET_MODEL with Setting $SETTING_NAME finished."
        echo "--------------------------------------------------"
    done
    
    echo "All models for Experiment Setting $SETTING_NAME finished."
    echo "==============================================================="
done

echo "All ablation experiments for all models finished." 