#!/bin/bash

source ~/.bashrc
conda activate agent_tuning

WANDB__SERVICE_WAIT=500 \
WANDB_PROJECT=agent \
WANDB_ENTITY=vl001 \
WANDB_NAME=training_data_31k \
deepspeed \
  --master_port=9985 \
  --num_gpus 8 \
  --num_nodes 1 \
  ../LLaMA-Factory/src/train_bash.py \
  --deepspeed ds_config_3_xofl.json \
  --stage sft \
  --model_name_or_path <model path> \
  --do_train \
  --dataset training_data_31k \
  --train_size 31255 \
  --shuffle False \
  --dataset_dir ../data/code/ \
  --template llama2 \
  --finetuning_type lora \
  --lora_target q_proj,v_proj \
  --output_dir <output_dir> \
  --overwrite_output_dir True \
  --cache_path <cache path> \
  --per_device_train_batch_size 2 \
  --per_device_eval_batch_size 1 \
  --gradient_accumulation_steps 4 \
  --gradient_checkpointing True \
  --lr_scheduler_type cosine \
  --evaluation_strategy "steps" \
  --save_strategy "epoch" \
  --logging_steps "30" \
  --save_total_limit 1 \
  --preprocessing_num_workers 16 \
  --learning_rate 1e-5 \
  --weight_decay 0. \
  --warmup_ratio 0.03 \
  --num_train_epochs 8 \
  --plot_loss \
  --bf16 True \
  --cutoff_len 4096 \
  --report_to 'wandb' \
  --flash_attn True \
  --save_only_model \
  --seed 87

echo "exit code: $?"
