#!/bin/bash

# set -e

NUM_SIM_ARG=$1
NUM_SIMULATIONS=$((NUM_SIM_ARG - 1))

ANSWER_MODELS="gpt-4o-2024-08-06,us.anthropic.claude-3-5-sonnet-20240620-v1:0"
MODEL_PROBS="1,1"

if [ "$NUM_SIM_ARG" -eq 1 ]; then
    TEMPERATURE="0.0"
else
    TEMPERATURE="0.6"
fi

# NUM_SIMULATIONS=0
RELEASE_VERSION="release_v4"
N_JOBS=60  # 同時に実行するジョブ数
INDICES_FILE="llm_mcts/tasks/live_code_bench_code_generation/release_v4_202408_202411_indeces.txt"
MCTS_ALGO="thompson"


EXPERIMENT_NAME="release_4_${ANSWER_MODELS}_${MCTS_ALGO}_nsim${NUM_SIMULATIONS}"

start_time=$(date +%s)
cat $INDICES_FILE | PYTHONPATH=".:$PYTHONPATH" parallel -j $N_JOBS \
    python scripts/live_code_bench_code_generation/run_live_code_bench_code_generation.py \
    --experiment_name $EXPERIMENT_NAME \
    --idx {} \
    --release_version $RELEASE_VERSION \
    --answer_models $ANSWER_MODELS \
    --answer_model_probs $MODEL_PROBS \
    --temperature $TEMPERATURE \
    --num_simulations $NUM_SIMULATIONS \
    --mcts_algo $MCTS_ALGO
end_time=$(date +%s)
elapsed_time=$((end_time - start_time))
elapsed_time=$(echo "scale=2; $elapsed_time / 60" | bc)
echo "Elapsed time (run): $elapsed_time minutes"

start_time=$(date +%s)
cat $INDICES_FILE | PYTHONPATH=".:$PYTHONPATH" parallel -j $N_JOBS \
    python scripts/live_code_bench_code_generation/evaluate_live_code_bench_code_generation.py \
    --experiment_name $EXPERIMENT_NAME \
    --idx {} \
    --release_version $RELEASE_VERSION
end_time=$(date +%s)
elapsed_time=$((end_time - start_time))
elapsed_time=$(echo "scale=2; $elapsed_time / 60" | bc)
echo "Elapsed time (evaluate): $elapsed_time minutes"

start_time=$(date +%s)
cat $INDICES_FILE | PYTHONPATH=".:$PYTHONPATH" parallel -j $N_JOBS \
    python scripts/live_code_bench_code_generation/make_submission_live_code_bench_code_generation.py \
    --experiment_name $EXPERIMENT_NAME \
    --idx {} \
    --release_version $RELEASE_VERSION
end_time=$(date +%s)
elapsed_time=$((end_time - start_time))
elapsed_time=$(echo "scale=2; $elapsed_time / 60" | bc)
echo "Elapsed time (make submission): $elapsed_time minutes"

python scripts/live_code_bench_code_generation/gather_results_live_code_bench_code_generation.py \
    --experiment_name $EXPERIMENT_NAME

TOTAL_PROBLEMS=$(($(wc -l < $INDICES_FILE) + 1))
SOLVED_PROBLEMS=$(jq -s '[.[] | select(.private_tests.score==1)] | length ' logging/live_code_bench_code_generation/$EXPERIMENT_NAME/prediction.jsonl)
echo "Solved $SOLVED_PROBLEMS out of $TOTAL_PROBLEMS problems ($(echo "scale=2; 100 * $SOLVED_PROBLEMS / $TOTAL_PROBLEMS" | bc)%)"
