
#!/bin/bash

model_dir=your_model_dir
wandb offline

gradient_accumulation_steps=2
per_device_train_batch_size=1
save=0

task=$1 # gsm8k, ecqa
data_path=$2
seed=$3
epoch=$4
alignment_type=$5
neg_detach=$6
boundary=$7
beta=$8
rft=$9

bsz=`expr $gradient_accumulation_steps \* $per_device_train_batch_size \* 8`
if [ "$seed" == "1" ];then
save=1
fi


if [ $task == 'gsm8k' ]; then
model_max_length=512
fi

if [ $task == 'aqua' ]; then
model_max_length=512
fi

if [ $task == 'ecqa' ]; then
model_max_length=350
fi



output_dir=ckpt/llama-7b-${task}_${data_path}-${epoch}-type${alignment_type}-bsz${bsz}-neg_detach${neg_detach}_boundary${boundary}_beta${beta}_RFT${rft}_seed${seed}-hf
export PYTHTONPATH=`pwd`
deepspeed --num_gpus $HOST_GPU_NUM \
    train.py \
    --deepspeed configs/deepspeed_zero3.json \
    --model_name_or_path ${model_dir}/llama-7b-hf \
    --data_path $data_path \
    --fp16 True \
    --num_train_epochs ${epoch} \
    --output_dir $output_dir \
    --per_device_train_batch_size $per_device_train_batch_size \
    --per_device_eval_batch_size 4 \
    --gradient_accumulation_steps $gradient_accumulation_steps \
    --evaluation_strategy "no" \
    --metric_for_best_model 'accuracy' \
    --greater_is_better True \
    --save_strategy "no" \
    --save_total_limit 2 \
    --learning_rate 2e-5 \
    --weight_decay 0. \
    --warmup_ratio 0.03 \
    --lr_scheduler_type "linear" \
    --logging_steps 1 \
    --model_max_length $model_max_length \
    --report_to "wandb" \
    --dataloader_num_workers 4 \
    --gradient_checkpointing True \
    --seed $seed \
    --predict_with_generate \
    --alignment_type $alignment_type \
    --task $task \
    --neg_detach $neg_detach \
    --rft $rft \
    --save $save \
    --inference 1 \
    --beta $beta \
    --boundary $boundary

