#!/bin/bash

source /data/home/the/anaconda3/bin/activate
conda activate openr1 # 假设使用 openr1 环境，如果需要其他环境请修改

unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY all_proxy ALL_PROXY;
skip_empty_process_case_flag=${20:-0}
if [ $skip_empty_process_case_flag == 1 ]; then
    skip_empty_process_case=true
else
    skip_empty_process_case=false
fi

CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 ACCELERATE_LOG_LEVEL=info \
    accelerate launch --config_file recipes/accelerate_configs/zero2.yaml --num_processes 8 \
    src/open_r1/grpo.py --config recipes/DeepSeek-R1-Distill-Qwen-1.5B/grpo/config_grpo.yaml --raw_dataset_name "${1:-data/DeepSeek-R1-Distill-Qwen-1.5B-50-3/collect_solutions/train-0-50-steps-data.json}" \
    --dataset_name "${2:-data/DeepSeek-R1-Distill-Qwen-1.5B-50-3/train-0-50-steps-data.json}" \
    --output_dir "${3:-data/DeepSeek-R1-Distill-Qwen-1.5B-50-3/saved_checkpoints}" \
    --apply_process_reward ${4:-1} --reward_exp ${5:-1.0} --reward_coe ${6:-1.0} \
    --run_name "${7:-DeepSeek-R1-Distill-Qwen-1.5B-BNPO}" \
    --update_ref_model "${8:-1}" --ref_model_name_or_path "${9:-/data/home/the/models/DeepSeek-R1-Distill-Qwen-1.5B}" \
    --beta "${10:-0.04}" --num_train_epochs "${11:-20}" \
    --apply_entropy_loss "${12:-0}" --theta "${13:-0.005}" --delta "${14:-0.005}" --tgt_ent "${15:-0.2}" \
    --num_generations "${16:-8}" --per_device_train_batch_size "${17:-6}" --temperature "${18:-0.6}" \
    --wandb_run_group "${19:-normal}" --skip_empty_process_case "$skip_empty_process_case" \
    --lr_scheduler_type "${21:-cosine_with_min_lr}" \
    --epsilon_high "${22:-0.2}" --min_theta "${23:-0.0}" --max_theta "${24:-0.005}"