#!/bin/bash

EXPERIMENT_NAME="test-gpt4o"
ANSWER_MODELS="gpt-4o-2024-08-06"
MODEL_PROBS="1"
JUDGE_MODEL="gpt-4o-2024-08-06"
TEMPERATURE="0.0"
JUDGE_TEMPERATURE="0.0"
NUM_SIMULATIONS=0
NUM_EXPAND_SAMPLES=0
INITIAL_EXPAND_SAMPLES=1

DATASET_NAME="princeton-nlp/SWE-bench_Lite_bm25_13K"
SPLIT="test"

N_JOBS=30  # 同時に実行するジョブ数
START_IDX=0
END_IDX=299

# ========================================================
# Run SWE-bench
# ========================================================
start_time=$(date +%s)
seq $START_IDX $END_IDX | PYTHONPATH=".:$PYTHONPATH" parallel -j $N_JOBS \
    python scripts/swe_bench/run_swe_bench.py \
    --experiment_name $EXPERIMENT_NAME \
    --idx {} \
    --dataset_name $DATASET_NAME \
    --split $SPLIT \
    --answer_models $ANSWER_MODELS \
    --answer_model_probs $MODEL_PROBS \
    --temperature $TEMPERATURE \
    --num_simulations $NUM_SIMULATIONS \
    --num_expand_samples $NUM_EXPAND_SAMPLES \
    --initial_expand_samples $INITIAL_EXPAND_SAMPLES \
    --judge_model $JUDGE_MODEL \
    --judge_temperature $JUDGE_TEMPERATURE
end_time=$(date +%s)
elapsed_time=$((end_time - start_time))
elapsed_time=$(echo "scale=2; $elapsed_time / 60" | bc)
echo "Elapsed time (run): $elapsed_time minutes"


# ========================================================
# Evaluate SWE-bench
# ========================================================
start_time=$(date +%s)
seq $START_IDX $END_IDX | PYTHONPATH=".:$PYTHONPATH" parallel -j $N_JOBS \
    python scripts/swe_bench/evaluate_swe_bench.py \
    --experiment_name $EXPERIMENT_NAME \
    --idx {} \
    --dataset_name $DATASET_NAME \
    --split $SPLIT \
    --judge_model $JUDGE_MODEL \
    --judge_temperature $JUDGE_TEMPERATURE
end_time=$(date +%s)
elapsed_time=$((end_time - start_time))
elapsed_time=$(echo "scale=2; $elapsed_time / 60" | bc)
echo "Elapsed time (evaluate): $elapsed_time minutes"

# ========================================================
# Make submission
# ========================================================
start_time=$(date +%s)
seq $START_IDX $END_IDX | PYTHONPATH=".:$PYTHONPATH" parallel -j $N_JOBS \
    python scripts/swe_bench/make_submission_swe_bench.py \
    --experiment_name $EXPERIMENT_NAME \
    --idx {} \
    --dataset_name $DATASET_NAME \
    --split $SPLIT
end_time=$(date +%s)
elapsed_time=$((end_time - start_time))
elapsed_time=$(echo "scale=2; $elapsed_time / 60" | bc)
echo "Elapsed time (make submission): $elapsed_time minutes"

# ========================================================
# Gather results for evaluation
# ========================================================
python scripts/swe_bench/gather_results_swe_bench.py \
    --experiment_name $EXPERIMENT_NAME

