set -x
# 定义要循环的列表
arg_list=("ckpt0" "ckpt170" "ckpt171" "ckpt172" "ckpt230" "ckpt380" "ckpt433" "ckpt_rlloss" "ckpt_target" "1B_best_ckpt" "1b_poor_ckpt" "1b_best2_ckpt")

for arg1 in "${arg_list[@]}"; do
  ckpt_path="/root/work/filestorage/zhanglianlian/Memory/OpenRLHF-0.8.1.post1/sft_ckpt/$arg1"
  log_file="log/1B_train_dpo_model_$arg1"

  # 如果日志文件已存在，跳过
  if [ -f "$log_file" ]; then
    echo "跳过已完成的任务: $arg1"
    continue
  fi

  export VLLM_ALLOW_INSECURE_SERIALIZATION=1
  
  deepspeed --module  openrlhf.cli.train_dpo \
     --save_path ckpt/1B/train_dpo_$arg1 \
     --save_steps 10 \
     --logging_steps 1 \
     --save_hf_ckpt \
     --eval_steps 10 \
     --train_batch_size 256 \
     --micro_train_batch_size 1 \
     --pretrain $ckpt_path \
     --bf16 \
     --max_epochs 1 \
     --max_len 8192 \
     --zero_stage 3 \
     --learning_rate 5e-7 \
     --beta 0.1 \
     --dataset  dpo_data/train \
     --apply_chat_template \
     --prompt_key prompt \
     --chosen_key chosen \
     --rejected_key rejected \
     --flash_attn \
     --disable_ds_ckpt \
     --load_checkpoint \
     --packing_samples \
     --gradient_checkpointing > $log_file 2>&1
    cd ckpt/1B/train_dpo_$arg1/
    mkdir last
    mv added_tokens.json  config.json  generation_config.json  merges.txt  model.safetensors  special_tokens_map.json  tokenizer.json  tokenizer_config.json  vocab.json  last/
    cd - 
    sleep 30
done
