#!/bin/bash

EXPERIMENT_NAME="debug"
ANSWER_MODELS="gpt-4o-2024-08-06,us.anthropic.claude-3-5-sonnet-20240620-v1:0,gemini-1.5-pro-002,gpt-4o-mini"
MODEL_PROBS="1,1,1,1"
JUDGE_MODEL="gpt-4o-2024-08-06"
TEMPERATURE="0.7,0.2,0.1,0.05"
JUDGE_TEMPERATURE="0.0"
NUM_SIMULATIONS=2
NUM_EXPAND_SAMPLES=2
INITIAL_EXPAND_SAMPLES=4

N_JOBS=5  # 同時に実行するジョブ数
START_IDX=0
END_IDX=2

# seq $START_IDX $END_IDX | PYTHONPATH=".:$PYTHONPATH" parallel -j $N_JOBS \
#     python scripts/run_math_vista.py \
#     --experiment_name $EXPERIMENT_NAME \
#     --idx {} \
#     --answer_models $ANSWER_MODELS \
#     --answer_model_probs $MODEL_PROBS \
#     --temperature $TEMPERATURE \
#     --num_simulations $NUM_SIMULATIONS \
#     --num_expand_samples $NUM_EXPAND_SAMPLES \
#     --initial_expand_samples $INITIAL_EXPAND_SAMPLES \
#     --judge_model $JUDGE_MODEL \
#     --judge_temperature $JUDGE_TEMPERATURE

start_time=$(date +%s)
seq $START_IDX $END_IDX | PYTHONPATH=".:$PYTHONPATH" parallel -j $N_JOBS \
    python scripts/swe_bench/run_swe_bench.py \
    --experiment_name $EXPERIMENT_NAME \
    --idx {} \
    --answer_models $ANSWER_MODELS \
    --answer_model_probs $MODEL_PROBS \
    --temperature $TEMPERATURE \
    --num_simulations $NUM_SIMULATIONS \
    --num_expand_samples $NUM_EXPAND_SAMPLES \
    --initial_expand_samples $INITIAL_EXPAND_SAMPLES \
    --judge_model $JUDGE_MODEL \
    --judge_temperature $JUDGE_TEMPERATURE
end_time=$(date +%s)
elapsed_time=$((end_time - start_time))
elapsed_time=$(echo "scale=2; $elapsed_time / 60" | bc)
echo "Elapsed time (run): $elapsed_time minutes"

start_time=$(date +%s)
seq $START_IDX $END_IDX | PYTHONPATH=".:$PYTHONPATH" parallel -j $N_JOBS \
    python scripts/swe_bench/evaluate_swe_bench.py \
    --experiment_name $EXPERIMENT_NAME \
    --idx {} \
    --judge_model $JUDGE_MODEL \
    --judge_temperature $JUDGE_TEMPERATURE
end_time=$(date +%s)
elapsed_time=$((end_time - start_time))
elapsed_time=$(echo "scale=2; $elapsed_time / 60" | bc)
echo "Elapsed time (evaluate): $elapsed_time minutes"

start_time=$(date +%s)
seq $START_IDX $END_IDX | PYTHONPATH=".:$PYTHONPATH" parallel -j $N_JOBS \
    python scripts/swe_bench/make_submission_swe_bench.py \
    --experiment_name $EXPERIMENT_NAME \
    --idx {}
end_time=$(date +%s)
elapsed_time=$((end_time - start_time))
elapsed_time=$(echo "scale=2; $elapsed_time / 60" | bc)
echo "Elapsed time (make submission): $elapsed_time minutes"

python scripts/swe_bench/gather_results_swe_bench.py \
    --experiment_name $EXPERIMENT_NAME

