#!/bin/bash

# Create base results directory
mkdir -p ./results/semantic_guided_search
mkdir -p ./logs

# Get current timestamp for log files
timestamp=$(date +"%Y%m%d_%H%M%S")

# Define repeat counts to iterate over
#repeat_counts=(1 3 5 10 30)
repeat_counts=(3)

# Beam multiplier (ratio of num_beams to num_beam_groups)
beam_multiplier=3

# Base batch size (will be adjusted based on num_sequences)
base_batch_size=70

# Temperature value for sampling
temperature=1.0

# Function to run evaluation for a specific model with specific repeat count
run_model_eval() {
    local model_path=$1
    local gpu_id=$2
    local num_sequences=$3

    # Extract model name from path
    model_name=$(basename $model_path)

    # Dynamically adjust batch_size to ensure consistent effective batch size
    # Formula: adjusted_batch_size = base_batch_size / num_sequences
    # Ensure batch_size is at least 1
    adjusted_batch_size=$(python -c "import math; print(max(1, math.floor($base_batch_size / $num_sequences)))")

    # Set sampling method
    sampling_method="semantic_guided_search"

    # Scale beam parameters with configurable multiplier
    # Make num_beam_groups equal to num_sequences for proper diverse sequence generation
    num_beam_groups=$num_sequences
    # Make num_beams beam_multiplier times num_beam_groups
    num_beams=$(python -c "import math; print(int($num_beam_groups * $beam_multiplier))")

    # Create log file
    log_file="./logs/${sampling_method}_repeat${num_sequences}_${model_name}.log"

    echo "[$(date '+%Y-%m-%d %H:%M:%S')] Starting evaluation for $model_name on GPU $gpu_id" | tee -a "$log_file"
    echo "Running evaluation with:" | tee -a "$log_file"
    echo "- Sampling method: $sampling_method" | tee -a "$log_file"
    echo "- Model: $model_name" | tee -a "$log_file"
    echo "- Number of sequences: $num_sequences" | tee -a "$log_file"
    echo "- Beam multiplier: $beam_multiplier" | tee -a "$log_file"
    echo "- Adjusted batch size: $adjusted_batch_size (base: $base_batch_size)" | tee -a "$log_file"
    echo "- Effective batch size: $((adjusted_batch_size * num_sequences))" | tee -a "$log_file"
    echo "- Number of beams: $num_beams" | tee -a "$log_file"
    echo "- Number of beam groups: $num_beam_groups" | tee -a "$log_file"
    echo "- GPU: $gpu_id" | tee -a "$log_file"
    echo "- Log file: $log_file" | tee -a "$log_file"

    # Create directory structure - matching the format in temperature_sample.sh
    output_dir="./results/${sampling_method}/repeat${num_sequences}"
    mkdir -p $output_dir

    # Run evaluation and log output
    {
        echo "[$(date '+%Y-%m-%d %H:%M:%S')] Evaluation command started"

        CUDA_VISIBLE_DEVICES=$gpu_id HF_ALLOW_CODE_EVAL=1 HF_DATASETS_TRUST_REMOTE_CODE=true confirm_run_unsafe_code=true python -m lm_eval \
            --model hf \
            --model_args pretrained=$model_path,trust_remote_code=True,dtype=bfloat16 \
            --tasks arc_challenge_chat,coqa,drop,ifeval,gsm8k,minerva_math_algebra \
            --confirm_run_unsafe_code \
            --limit 250 \
            --batch_size $adjusted_batch_size \
            --device cuda \
            --trust_remote_code \
            --apply_chat_template \
            --sampling_method $sampling_method \
            --output_path $output_dir \
            --gen_kwargs num_beams=$num_beams,num_beam_groups=$num_beam_groups,num_return_sequences=$num_sequences,temperature=$temperature,do_sample=True,top_p=0.8,top_k=20,max_length=8096,repetition_penalty=1.2,max_gen_toks=4096

        CUDA_VISIBLE_DEVICES=$gpu_id HF_ALLOW_CODE_EVAL=1 HF_DATASETS_TRUST_REMOTE_CODE=true confirm_run_unsafe_code=true python -m lm_eval \
                    --model hf \
                    --model_args pretrained=$model_path,trust_remote_code=True,dtype=bfloat16 \
                    --tasks bbh_cot_zeroshot,mmlu_pro_plus \
                    --confirm_run_unsafe_code \
                    --limit 25 \
                    --batch_size $adjusted_batch_size \
                    --device cuda \
                    --trust_remote_code \
                    --apply_chat_template \
                    --sampling_method $sampling_method \
                    --output_path ${output_dir}_group \
                    --gen_kwargs num_beams=$num_beams,num_beam_groups=$num_beam_groups,num_return_sequences=$num_sequences,temperature=$temperature,do_sample=True,top_p=0.8,top_k=20,max_length=8096,repetition_penalty=1.2,max_gen_toks=4096

        # wmt16
        CUDA_VISIBLE_DEVICES=$gpu_id HF_ALLOW_CODE_EVAL=1 HF_DATASETS_TRUST_REMOTE_CODE=true confirm_run_unsafe_code=true python -m lm_eval \
                                    --model hf \
                                    --model_args pretrained=$model_path,trust_remote_code=True,dtype=bfloat16 \
                                    --tasks wmt16 \
                                    --confirm_run_unsafe_code \
                                    --limit 70 \
                                    --batch_size 70 \
                                    --device cuda \
                                    --trust_remote_code \
                                    --apply_chat_template \
                                    --sampling_method $sampling_method \
                                    --output_path ${output_dir}_wmt16 \
                                    --gen_kwargs num_beams=$num_beams,num_beam_groups=$num_beam_groups,num_return_sequences=$num_sequences,temperature=$temperature,do_sample=True,top_p=0.8,top_k=20,max_length=8096,repetition_penalty=1.2,max_gen_toks=4096

        echo "[$(date '+%Y-%m-%d %H:%M:%S')] Evaluation command completed with exit code: $?"
    } 2>&1 | tee -a "$log_file"
}

# Main execution logic
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Starting semantic guided search evaluations"

# Define GPU assignments for models
declare -A gpu_model_map=(
    ["6"]="Qwen/Qwen2.5-3B-Instruct"
#    ["1"]="meta-llama/Llama-3.2-1B-Instruct"
#    ["2"]="Qwen/Qwen2.5-1.5B-Instruct"
#    ["4"]="meta-llama/Llama-3.2-3B-Instruct"
#    ["3"]="meta-llama/Llama-3.1-8B-Instruct"
#    ["6"]="Qwen/Qwen2.5-7B-Instruct"
)

# Process tasks in parallel across GPUs, but serial on each GPU
# Launch one job per GPU and keep them running
for gpu_id in "${!gpu_model_map[@]}"; do
    (
        model_path="${gpu_model_map[$gpu_id]}"
        echo "[$(date '+%Y-%m-%d %H:%M:%S')] Starting sequence of evaluations for GPU $gpu_id with model $model_path"

        # Process all repeat combinations sequentially on this GPU
        for repeat in "${repeat_counts[@]}"; do
            run_model_eval "$model_path" "$gpu_id" "$repeat"
            echo "[$(date '+%Y-%m-%d %H:%M:%S')] Completed evaluation with repeat=$repeat on GPU $gpu_id"
        done

        echo "[$(date '+%Y-%m-%d %H:%M:%S')] All evaluations for GPU $gpu_id completed"
    ) &
done

# Wait for all GPU processes to finish
wait

echo "[$(date '+%Y-%m-%d %H:%M:%S')] All evaluations completed!"
echo "Log files are available in the ./logs directory"
echo "Results are available in the ./results/semantic_guided_search directory"

# Example for individual test if needed
#arc_challenge_chat,coqa,drop,ifeval,gsm8k,minerva_math_algebra,bbh_cot_zeroshot,mmlu_pro_plus,wmt16
#CUDA_VISIBLE_DEVICES=7 HF_ALLOW_CODE_EVAL=1 HF_DATASETS_TRUST_REMOTE_CODE=true confirm_run_unsafe_code=true python -m lm_eval \
#         --model hf \
#         --model_args pretrained=Qwen/Qwen2.5-3B-Instruct,trust_remote_code=True,dtype=bfloat16 \
#         --tasks bbh_cot_zeroshot \
#         --confirm_run_unsafe_code \
#         --limit 3 \
#         --batch_size 100000 \
#         --device cuda \
#         --trust_remote_code \
#         --apply_chat_template \
#         --log_samples \
#         --sampling_method semantic_guided_search \
#         --output_path ./results/test \
#         --gen_kwargs num_beams=9,num_beam_groups=3,num_return_sequences=3,temperature=1.0,do_sample=True,top_p=0.8,top_k=20,max_length=8096,repetition_penalty=1.2,max_gen_toks=4096