set -ex
PROMPT_TYPE=$1
MODEL_NAME_OR_PATH=$2
OUTPUT_DIR=$3
temperature=$4
max_tokens=$5
top_p=$6
benchmarks=${7:-"gsm8k,math500,minerva_math,gaokao2023en,olympiadbench,college_math,aime24,amc23"}
num_shots=$8
shot_source=$9
SPLIT="test"
NUM_TEST_SAMPLE=-1
OVERWRITE=${10:-false}
N_SAMPLING=${11:-1}
seed=${12:-0}
DATA_NAME=${benchmarks}
echo "Running evaluation with the following parameters: ${num_shots} shots, shot source: ${shot_source}"
if [ "$OVERWRITE" = "true" ]; then
    OVERWRITE="--overwrite"
else
    OVERWRITE=""
fi


set -ex
PROMPT_TYPE=$1
MODEL_NAME_OR_PATH=$2
OUTPUT_DIR=$3
temperature=$4
max_tokens=$5
top_p=$6
benchmarks=${7:-"gsm8k,math500,minerva_math,gaokao2023en,olympiadbench,college_math,aime24,amc23"}
num_shots=$8
shot_source=$9
SPLIT="test"
NUM_TEST_SAMPLE=-1
OVERWRITE=${10:-false}
N_SAMPLING=${11:-1}
seed=${12:-0}
DATA_NAME=${benchmarks}
echo "Running evaluation with the following parameters: ${num_shots} shots, shot source: ${shot_source}"
if [ "$OVERWRITE" = "true" ]; then
    OVERWRITE="--overwrite"
else
    OVERWRITE=""
fi

IFS=',' read -ra BENCHMARK_ARRAY <<< "$benchmarks"
REGULAR_BENCHMARKS=()
SPECIAL_BENCHMARKS=()

for benchmark in "${BENCHMARK_ARRAY[@]}"; do
    if [[ "$benchmark" == "aime24" || "$benchmark" == "amc23" || "$benchmark" == "aime24EX7" || "$benchmark" == "aime24EX7_withouthint" ]]; then
        SPECIAL_BENCHMARKS+=("$benchmark")
    else
        REGULAR_BENCHMARKS+=("$benchmark")
    fi
done


# If temperature is 0, combine the benchmark arrays
if [ "$temperature" = "0.0" ] || [ "$temperature" = "0" ]; then
    REGULAR_BENCHMARKS=("${REGULAR_BENCHMARKS[@]}" "${SPECIAL_BENCHMARKS[@]}")
    SPECIAL_BENCHMARKS=()
fi



num_samples=6
# 计算每个GPU处理的样本数
num_gpus=8
samples_per_gpu=$((num_samples / num_gpus))
remainder=$((num_samples % num_gpus))

echo "Total samples: $num_samples"
echo "GPUs: $num_gpus"
echo "Samples per GPU: $samples_per_gpu"
echo "Remainder: $remainder"
echo "Model: $MODEL (suffix: $MODEL_SUFFIX)"
echo "Running evaluation: ${DATASET} with ${SHOTS} shots"

# # 清理之前的输出文件
# rm -f output_gpu_*.json
# rm -f output.json

time_start=$(date +%s)
# 并行运行多个GPU任务
pids=()
current_sample=0
# Run regular benchmarks with n_sampling=1
if [ ${#REGULAR_BENCHMARKS[@]} -gt 0 ]; then
    REGULAR_BENCHMARKS_STR=$(IFS=,; echo "${REGULAR_BENCHMARKS[*]}")
    for ((gpu_id=0; gpu_id<num_gpus; gpu_id++)); do
        # 计算当前GPU的样本数量
        gpu_samples=$samples_per_gpu
        
        # 将余下的样本尽可能均匀分配：前remainder个GPU各多处理一个样本
        if [ $gpu_id -lt $remainder ]; then
            gpu_samples=$((gpu_samples + 1))
        fi
        
        # 计算当前GPU的样本范围
        start=$current_sample
        end=$((current_sample + gpu_samples))
        current_sample=$end
            
        echo "GPU $gpu_id: processing samples $start to $end (total: $gpu_samples samples)"

        sleep 10s
        # 在后台运行每个GPU的任务
        TOKENIZERS_PARALLELISM=false \
        CUDA_VISIBLE_DEVICES=$gpu_id 
        python -u math_eval.py \
            --model_name_or_path ${MODEL_NAME_OR_PATH} \
            --data_name ${REGULAR_BENCHMARKS_STR} \
            --output_dir ${OUTPUT_DIR} \
            --split ${SPLIT} \
            --prompt_type ${PROMPT_TYPE} \
            --num_test_sample ${NUM_TEST_SAMPLE} \
            --max_tokens_per_call ${max_tokens} \
            --seed ${seed} \
            --temperature ${temperature} \
            --n_sampling ${N_SAMPLING} \
            --top_p ${top_p} \
            --start $start  \
            --end $end \
            --num_shots ${num_shots} \
            --shot_source ${shot_source} \
            --use_vllm \
            --save_outputs \
            ${OVERWRITE} &
        pids+=($!)
    done
fi

# Run special benchmarks (aime24, amc23) with n_sampling=8
if [ ${#SPECIAL_BENCHMARKS[@]} -gt 0 ]; then
    SPECIAL_BENCHMARKS_STR=$(IFS=,; echo "${SPECIAL_BENCHMARKS[*]}")
    for ((gpu_id=0; gpu_id<num_gpus; gpu_id++)); do
        # 计算当前GPU的样本数量
        gpu_samples=$samples_per_gpu
        
        # 将余下的样本尽可能均匀分配：前remainder个GPU各多处理一个样本
        if [ $gpu_id -lt $remainder ]; then
            gpu_samples=$((gpu_samples + 1))
        fi
        
        # 计算当前GPU的样本范围
        start=$current_sample
        end=$((current_sample + gpu_samples))
        current_sample=$end
            
        echo "GPU $gpu_id: processing samples $start to $end (total: $gpu_samples samples)"

        sleep 10s
        # 在后台运行每个GPU的任务
        TOKENIZERS_PARALLELISM=false \
        CUDA_VISIBLE_DEVICES=$gpu_id 
        python -u math_eval.py \
            --model_name_or_path ${MODEL_NAME_OR_PATH} \
            --data_name ${SPECIAL_BENCHMARKS_STR} \
            --output_dir ${OUTPUT_DIR} \
            --split ${SPLIT} \
            --prompt_type ${PROMPT_TYPE} \
            --num_test_sample ${NUM_TEST_SAMPLE} \
            --max_tokens_per_call ${max_tokens} \
            --seed ${seed} \
            --temperature ${temperature} \
            --n_sampling ${N_SAMPLING} \
            --top_p ${top_p} \
            --start $start  \
            --end $end \
            --num_shots ${num_shots} \
            --shot_source ${shot_source} \
            --use_vllm \
            --save_outputs \
            ${OVERWRITE} &
        pids+=($!)
    done
fi
echo "Waiting for all GPU tasks to complete..."
for pid in "${pids[@]}"; do
    wait $pid
    echo "Task with PID $pid completed"
done

echo "All GPU tasks completed. Merging results..."
time_end=$(date +%s)
duration=$((time_end - time_start))
duration_minutes=$(awk "BEGIN {printf \"%.1f\", $duration/60}")
echo "Total duration: $duration seconds (${duration_minutes} minutes)" 

# # 合并所有输出文件
# python -c "
# import json
# import glob

# # 收集所有GPU输出文件
# output_files = glob.glob(f'output_gpu_*.json')
# output_files.sort()

# merged_data = []

# for file in output_files:
#     print(f'Merging {file}...')
#     try:
#         with open(file, 'r') as f:
#             data = json.load(f)
#             if isinstance(data, list):
#                 merged_data.extend(data)
#             else:
#                 merged_data.append(data)
#     except Exception as e:
#         print(f'Error reading {file}: {e}')

# for i, data in enumerate(merged_data):
#     data_new = {
#         'i': i,
#         **{key: data[key] for key in data.keys()},
#     }
#     merged_data[i] = data_new

# # 保存合并结果
# with open('output_gpu.json', 'w') as f:
#     json.dump(merged_data, f, indent=4, ensure_ascii=False)

# print(f'Merged {len(merged_data)} samples into output.json')
# print('Individual GPU files can be found as output_gpu_*.json')
# "

# save_path=${LOG_FILE}_temperature_${temperature}_seqlen_${response_length}_n_${n_samples_per_prompt}_vllm.json
# mkdir -p $(dirname ${save_path})
# cp "output_gpu.json" ${save_path}
# echo "Merging completed. Final result saved to ${save_path}" 