#!/bin/bash
set -ex

# Parameters
PROMPT_TYPE=$1
MODEL_NAME_OR_PATH=$2
OUTPUT_DIR=$3
BENCHMARK=$4
N_SAMPLING=$5
temperature=${6:-0.0}
max_tokens=${7:-2048}
top_p=${8:-1.0}
num_shots=${9:-0}
shot_source=${10:-""}
OVERWRITE=${11:-false}
seed=${12:-0}
MODEL_NAME=${13:-"gpt-3.5-turbo"}

# Fixed parameters
SPLIT="test"
NUM_TEST_SAMPLE=-1
num_gpus=4

echo "Running evaluation with the following parameters:"
echo "Model: $MODEL_NAME_OR_PATH"
echo "Benchmark: $BENCHMARK" 
echo "N_sampling: $N_SAMPLING"
echo "Temperature: $temperature"
echo "Number of shots: $num_shots"
echo "Shot source: $shot_source"

# Set overwrite flag
if [ "$OVERWRITE" = "true" ]; then
    OVERWRITE_FLAG="--overwrite"
else
    OVERWRITE_FLAG=""
fi

# Calculate samples per GPU (assuming we want to process all samples)
# For now, we'll distribute the sampling across GPUs
samples_per_gpu=$(($N_SAMPLING / $num_gpus))
remainder=$(($N_SAMPLING % $num_gpus))

if [ $samples_per_gpu -eq 0 ]; then
    # If N_SAMPLING is less than num_gpus, use fewer GPUs
    num_gpus=$N_SAMPLING
    samples_per_gpu=1
    remainder=0
fi

echo "Total sampling: $N_SAMPLING"
echo "GPUs to use: $num_gpus"
echo "Samples per GPU: $samples_per_gpu"
echo "Remainder: $remainder"

time_start=$(date +%s)

# 清除 "${OUTPUT_DIR}/temp_res" 下的所有 json 和 jsonl
mkdir -p "${OUTPUT_DIR}/temp_res/${MODEL_NAME}"
if [ -d "${OUTPUT_DIR}/temp_res/${MODEL_NAME}" ]; then
    rm -f "${OUTPUT_DIR}/temp_res/${MODEL_NAME}"/*.json "${OUTPUT_DIR}/temp_res/${MODEL_NAME}"/*.jsonl 2>/dev/null || true
fi

# Run parallel tasks
pids=()
current_sample=0

for ((gpu_id=0; gpu_id<num_gpus; gpu_id++)); do
    # Calculate samples for current GPU
    gpu_samples=$samples_per_gpu
    
    # Distribute remainder samples to first few GPUs
    if [ $gpu_id -lt $remainder ]; then
        gpu_samples=$((gpu_samples + 1))
    fi
    
    # Skip if no samples for this GPU
    if [ $gpu_samples -eq 0 ]; then
        continue
    fi
    
    # Calculate sample range (for this simplified version, we'll use different seeds)
    gpu_seed=$((seed + gpu_id))
    
    echo "GPU $gpu_id: processing $gpu_samples samples with seed $gpu_seed"
    
    sleep 1  # Brief delay to avoid conflicts
    
    # Run task on GPU
    TOKENIZERS_PARALLELISM=false \
    CUDA_VISIBLE_DEVICES=$gpu_id \
    python -u math_eval.py \
        --model_name_or_path ${MODEL_NAME_OR_PATH} \
        --data_name ${BENCHMARK} \
        --output_dir "${OUTPUT_DIR}/temp_res/${MODEL_NAME}" \
        --split ${SPLIT} \
        --prompt_type ${PROMPT_TYPE} \
        --num_test_sample ${NUM_TEST_SAMPLE} \
        --max_tokens_per_call ${max_tokens} \
        --seed ${gpu_seed} \
        --temperature ${temperature} \
        --n_sampling ${gpu_samples} \
        --top_p ${top_p} \
        --num_shots ${num_shots} \
        --shot_source ${shot_source} \
        --use_vllm \
        --save_outputs \
        --Gpuid ${gpu_id} \
        ${OVERWRITE_FLAG} &
    
    pids+=($!)
done

echo "Waiting for all GPU tasks to complete..."
for pid in "${pids[@]}"; do
    wait $pid
    echo "Task with PID $pid completed"
done

echo "All GPU tasks completed. Merging results..."
time_end=$(date +%s)
duration=$((time_end - time_start))
duration_minutes=$(awk "BEGIN {printf \"%.1f\", $duration/60}")
echo "Total duration: $duration seconds (${duration_minutes} minutes)" 

# Merge all output files

python /chencongliang/project/icl-diversity/limit-of-RLVR/math/outputs/scripts/merge_jsons.py \
    --input_dir "${OUTPUT_DIR}/temp_res/${MODEL_NAME}"

echo "Evaluation completed." 