#!/bin/bash

TRAIN_DATASET=math_10k
TEST_DATASETS=("LLMTOOLs")  
METHOD_TYPE=explore
MODEL=Qwen/Qwen2.5-7B
EXPLORE_MODEL_A=Qwen/Qwen2.5-7B 
NUM_EPOCHS_LIST=(2)
ADAPTER=lora
LR_RATE=2e-4
LR_RATE_A=2e-4
CUTOFF_LEN=256
EXPLORE_FLAG=1
EXPLORE_LOGITS_FACTOR=100
LAMBDAS=("0.3") # 
CLIP_VALUES=(0.1)
SEEDS=(1)
DECODING=greedy
TOP_K=-1
WANDB_PROJECT=llmtools-qwen
MODEL_NAME=llmboost-qwen-llmtools-7b-7b
TOPK_LIST=(1) 
gpus="0,0"
ALPHAS=(0.1) #0.5) # 0.2 0.5 0.8 1.0
BETAS=(0) #0.2)
export TOKENIZERS_PARALLELISM=false

# ========== 函数：评估 ==========
function run_eval {
  TOPK_LOGITS=$1
  EPOCHS=$2
  CLIP_VALUE=$3
  OUTDIR=./trained_models/$METHOD_TYPE/$TRAIN_DATASET/$EPOCHS/${MODEL_NAME}-${CLIP_VALUE}-${ADAPTER}-${LR_RATE}/
  OUTDIR_COPILOT=${OUTDIR}Copilot-1/ #,${OUTDIR}Copilot-2/

  for TEST_DATASET in "${TEST_DATASETS[@]}"; do
    for lambda_value in "${LAMBDAS[@]}"; do
      LOG_DIR="logs/$TRAIN_DATASET/$EPOCHS/$METHOD_TYPE/${TEST_DATASET}/${MODEL_NAME}"
      LOG_FILE="$LOG_DIR/${MODEL_NAME}-${ADAPTER}-${TEST_DATASET}-${METHOD_TYPE}-${DECODING}-EXPLORE_FLAG-${EXPLORE_FLAG}-Copilot-lambda${lambda_value}.log"
      mkdir -p "$LOG_DIR"

      echo "🧪 [EVAL] TOPK=$TOPK_LOGITS, lambda=$lambda_value on GPU $GPU_ID -> $LOG_FILE"

      CUDA_VISIBLE_DEVICES=6 python llmtools_evaluate_llmboost_qwen.py \
        --model $MODEL \
        --adapter $ADAPTER \
        --dataset $TEST_DATASET \
        --base_model $MODEL \
        --lora_weights "$OUTDIR" \
        --explore_model $EXPLORE_MODEL_A \
        --base_explore_model $EXPLORE_MODEL_A \
        --explore_lora_weights "$OUTDIR_COPILOT" \
        --explore_flag $EXPLORE_FLAG \
        --explore_weight $lambda_value \
        --decoding_method $DECODING \
        --explore_logits_factor $EXPLORE_LOGITS_FACTOR \
        --top_k_explore_logits $TOP_K \
        --topk_logits $TOPK_LOGITS \
        --wandb_run_name "eval-${MODEL_NAME}-${EXPLORE_FLAG}-lambda${lambda_value}-seed-${SEED}" \
        --wandb_project $WANDB_PROJECT \
        --num_epochs $EPOCHS \
        --seed $SEED \
        --alpha $ALPHA \
        --beta $BETA \
        --gpus $gpus \
        --model_type $MODEL_NAME 
    done
  done
}

# ========== 主逻辑：训练 + 评估 ==========
for NUM_EPOCHS in "${NUM_EPOCHS_LIST[@]}"; do
  for CLIP_VALUE in "${CLIP_VALUES[@]}"; do   # 遍历 clip_value
    for i in "${!TOPK_LIST[@]}"; do
      for ALPHA in "${ALPHAS[@]}"; do
        for BETA in "${BETAS[@]}"; do
          for SEED in "${SEEDS[@]}"; do 
            TOPK=${TOPK_LIST[$i]}
            run_eval $TOPK $NUM_EPOCHS $CLIP_VALUE $SEED
          done
        done
      done
    done
  done
done

wait
echo "✅ 所有训练和评估任务完成。"


# ========== 配置区域 ==========
TRAIN_DATASET=math_10k
TEST_DATASETS=("LLMTOOLs")  
METHOD_TYPE=explore
MODEL=Qwen/Qwen2.5-7B
EXPLORE_MODEL_A=Qwen/Qwen2.5-7B #,Qwen/Qwen3-4B
# NUM_EPOCHS_LIST=(100)
NUM_EPOCHS_LIST=(2)
ADAPTER=lora
LR_RATE=2e-4
LR_RATE_A=2e-4
CUTOFF_LEN=256
EXPLORE_FLAG=1
EXPLORE_LOGITS_FACTOR=100
LAMBDAS=("1") # 
CLIP_VALUES=(0.1)
SEEDS=(1)
DECODING=greedy
TOP_K=-1
WANDB_PROJECT=llmtools
MODEL_NAME=unite-916-qwen-llmtools-7b-7b
TOPK_LIST=(1)  # 超参数列表（最多8个）
gpus="0,0"
ALPHAS=(0.1) #0.5) # 0.2 0.5 0.8 1.0
BETAS=(0) #0.2)
export TOKENIZERS_PARALLELISM=false

# ========== 函数：评估 ==========
function run_eval {
  TOPK_LOGITS=$1
  EPOCHS=$2
  CLIP_VALUE=$3
  OUTDIR=./trained_models/$METHOD_TYPE/$TRAIN_DATASET/$EPOCHS/${MODEL_NAME}-${CLIP_VALUE}-${ADAPTER}-${LR_RATE}/
  OUTDIR_COPILOT=${OUTDIR}Copilot-1/ #,${OUTDIR}Copilot-2/

  for TEST_DATASET in "${TEST_DATASETS[@]}"; do
    for lambda_value in "${LAMBDAS[@]}"; do
      LOG_DIR="logs/$TRAIN_DATASET/$EPOCHS/$METHOD_TYPE/${TEST_DATASET}/${MODEL_NAME}"
      LOG_FILE="$LOG_DIR/${MODEL_NAME}-${ADAPTER}-${TEST_DATASET}-${METHOD_TYPE}-${DECODING}-EXPLORE_FLAG-${EXPLORE_FLAG}-Copilot-lambda${lambda_value}.log"
      mkdir -p "$LOG_DIR"

      echo "🧪 [EVAL] TOPK=$TOPK_LOGITS, lambda=$lambda_value on GPU $GPU_ID -> $LOG_FILE"

      CUDA_VISIBLE_DEVICES=6 python evaluate_llmtools_qwen_unite.py \
        --model $MODEL \
        --adapter $ADAPTER \
        --dataset $TEST_DATASET \
        --base_model $MODEL \
        --lora_weights "$OUTDIR" \
        --explore_model $EXPLORE_MODEL_A \
        --base_explore_model $EXPLORE_MODEL_A \
        --explore_lora_weights "$OUTDIR_COPILOT" \
        --explore_flag $EXPLORE_FLAG \
        --explore_weight $lambda_value \
        --decoding_method $DECODING \
        --explore_logits_factor $EXPLORE_LOGITS_FACTOR \
        --top_k_explore_logits $TOP_K \
        --topk_logits $TOPK_LOGITS \
        --wandb_run_name "eval-${MODEL_NAME}-${EXPLORE_FLAG}-lambda${lambda_value}-seed-${SEED}" \
        --wandb_project $WANDB_PROJECT \
        --num_epochs $EPOCHS \
        --seed $SEED \
        --alpha $ALPHA \
        --beta $BETA \
        --gpus $gpus \
        --model_type $MODEL_NAME 
    done
  done
}

# ========== 主逻辑：训练 + 评估 ==========
for NUM_EPOCHS in "${NUM_EPOCHS_LIST[@]}"; do
  for CLIP_VALUE in "${CLIP_VALUES[@]}"; do   # 遍历 clip_value
    for i in "${!TOPK_LIST[@]}"; do
      for ALPHA in "${ALPHAS[@]}"; do
        for BETA in "${BETAS[@]}"; do
          for SEED in "${SEEDS[@]}"; do 
            TOPK=${TOPK_LIST[$i]}
            run_eval $TOPK $NUM_EPOCHS $CLIP_VALUE $SEED
          done
        done
      done
    done
  done
done

wait
echo "✅ 所有训练和评估任务完成。"


# ========== 配置区域 ==========
TRAIN_DATASET=math_10k
TEST_DATASETS=("LLMTOOLs")  
METHOD_TYPE=explore
MODEL=Qwen/Qwen2.5-7B
EXPLORE_MODEL_A=Qwen/Qwen2.5-7B #,Qwen/Qwen3-4B
# NUM_EPOCHS_LIST=(100)
NUM_EPOCHS_LIST=(2)
ADAPTER=lora
LR_RATE=2e-4
LR_RATE_A=2e-4
CUTOFF_LEN=256
EXPLORE_FLAG=1
EXPLORE_LOGITS_FACTOR=100
LAMBDAS=("1") # 
CLIP_VALUES=(0.1)
SEEDS=(1)
DECODING=greedy
TOP_K=-1
WANDB_PROJECT=llmtools
MODEL_NAME=vote-916-qwen-llmtools-7b-7b
TOPK_LIST=(1)  # 超参数列表（最多8个）
gpus="0,0"
ALPHAS=(0.1) #0.5) # 0.2 0.5 0.8 1.0
BETAS=(0) #0.2)
export TOKENIZERS_PARALLELISM=false

# ========== 函数：评估 ==========
function run_eval {
  TOPK_LOGITS=$1
  EPOCHS=$2
  CLIP_VALUE=$3
  OUTDIR=./trained_models/$METHOD_TYPE/$TRAIN_DATASET/$EPOCHS/${MODEL_NAME}-${CLIP_VALUE}-${ADAPTER}-${LR_RATE}/
  OUTDIR_COPILOT=${OUTDIR}Copilot-1/ #,${OUTDIR}Copilot-2/

  for TEST_DATASET in "${TEST_DATASETS[@]}"; do
    for lambda_value in "${LAMBDAS[@]}"; do
      LOG_DIR="logs/$TRAIN_DATASET/$EPOCHS/$METHOD_TYPE/${TEST_DATASET}/${MODEL_NAME}"
      LOG_FILE="$LOG_DIR/${MODEL_NAME}-${ADAPTER}-${TEST_DATASET}-${METHOD_TYPE}-${DECODING}-EXPLORE_FLAG-${EXPLORE_FLAG}-Copilot-lambda${lambda_value}.log"
      mkdir -p "$LOG_DIR"

      echo "🧪 [EVAL] TOPK=$TOPK_LOGITS, lambda=$lambda_value on GPU $GPU_ID -> $LOG_FILE"

      CUDA_VISIBLE_DEVICES=6 python evaluate_llmtools_qwen_vote.py \
        --model $MODEL \
        --adapter $ADAPTER \
        --dataset $TEST_DATASET \
        --base_model $MODEL \
        --lora_weights "$OUTDIR" \
        --explore_model $EXPLORE_MODEL_A \
        --base_explore_model $EXPLORE_MODEL_A \
        --explore_lora_weights "$OUTDIR_COPILOT" \
        --explore_flag $EXPLORE_FLAG \
        --explore_weight $lambda_value \
        --decoding_method $DECODING \
        --explore_logits_factor $EXPLORE_LOGITS_FACTOR \
        --top_k_explore_logits $TOP_K \
        --topk_logits $TOPK_LOGITS \
        --wandb_run_name "eval-${MODEL_NAME}-${EXPLORE_FLAG}-lambda${lambda_value}-seed-${SEED}" \
        --wandb_project $WANDB_PROJECT \
        --num_epochs $EPOCHS \
        --seed $SEED \
        --alpha $ALPHA \
        --beta $BETA \
        --gpus $gpus \
        --model_type $MODEL_NAME 
    done
  done
}

# ========== 主逻辑：训练 + 评估 ==========
for NUM_EPOCHS in "${NUM_EPOCHS_LIST[@]}"; do
  for CLIP_VALUE in "${CLIP_VALUES[@]}"; do   # 遍历 clip_value
    for i in "${!TOPK_LIST[@]}"; do
      for ALPHA in "${ALPHAS[@]}"; do
        for BETA in "${BETAS[@]}"; do
          for SEED in "${SEEDS[@]}"; do 
            TOPK=${TOPK_LIST[$i]}
            run_eval $TOPK $NUM_EPOCHS $CLIP_VALUE $SEED
          done
        done
      done
    done
  done
done

wait
echo "✅ 所有训练和评估任务完成。"
