#!/bin/bash

#SBATCH --job-name=cllama_full
#SBATCH --output=cllama_full.out
#SBATCH --error=cllama_full.err
# SBATCH --job-name=cllama_full_scrp
# SBATCH --output=cllama_full_scrp.out
# SBATCH --error=cllama_full_scrp.err

#SBATCH --partition=compute
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=1
#SBATCH --gres=gpu:A100:3
#SBATCH --time=23:00:00
#SBATCH --mail-type=ALL
#SBATCH --mail-user=bo@andrew.cmu.edu

source ~/.bashrc
conda activate agent

WANDB__SERVICE_WAIT=300 WANDB_PROJECT=llama accelerate launch  --main_process_port 20503 --num_processes 3 --num_machines 1  /data/b_ou/agent-model/LLaMA-Factory/src/train_bash.py \
    --stage sft \
    --model_name_or_path /data/b_ou/ckpts/cllama/models--codellama--CodeLlama-7b-hf/snapshots/bc5283229e2fe411552f55c71657e97edf79066c/ \
    --cache_path /data/b_ou/ckpts/data_cache/cllama \
    --do_train \
    --do_eval \
    --dataset m2w_code \
    --train_size 16500 \
    --shuffle False \
    --dataset_dir /data/b_ou/agent/data/code/ \
    --template llama2 \
    --finetuning_type full \
    --output_dir /data/b_ou/ckpts/output_16k_cllama_full/ \
    --overwrite_output_dir True \
    --overwrite_cache \
    --per_device_train_batch_size 1 \
    --per_device_eval_batch_size 1 \
    --gradient_accumulation_steps 16 \
    --gradient_checkpointing True \
    --lr_scheduler_type cosine \
    --evaluation_strategy "steps" \
    --save_strategy "epoch" \
    --logging_steps "100" \
    --save_total_limit 1 \
    --learning_rate 5e-5 \
    --num_train_epochs 5 \
    --plot_loss \
    --bf16 True \
    --cutoff_len 4096 \
    --fsdp "full_shard auto_wrap" \
    --fsdp_transformer_layer_cls_to_wrap 'LlamaDecoderLayer' \
    --report_to 'wandb'

        # --eval_steps 1 \




# WANDB__SERVICE_WAIT=300 WANDB_PROJECT=llama accelerate launch  --main_process_port 20502 --num_processes 3 --num_machines 1  /data/b_ou/LLaMA-Factory/src/train_bash.py \
#     --stage sft \
#     --model_name_or_path /data/b_ou/ckpts/cllama/models--codellama--CodeLlama-7b-hf/snapshots/bc5283229e2fe411552f55c71657e97edf79066c/ \
#     --cache_path /data/b_ou/ckpts/data_cache/cllama_scrp \
#     --do_train \
#     --do_eval \
#     --dataset m2w_code_scrape \
#     --train_size 7500 \
#     --shuffle False \
#     --dataset_dir /data/b_ou/agent/data/code_scrp/ \
#     --template llama2 \
#     --finetuning_type full \
#     --output_dir /data/b_ou/ckpts/output_8k_cllama_full_scrp/ \
#     --overwrite_output_dir True \
#     --overwrite_cache \
#     --per_device_train_batch_size 1 \
#     --per_device_eval_batch_size 1 \
#     --gradient_accumulation_steps 16 \
#     --gradient_checkpointing True \
#     --lr_scheduler_type cosine \
#     --evaluation_strategy "steps" \
#     --save_strategy "epoch" \
#     --logging_steps "100" \
#     --save_total_limit 1 \
#     --learning_rate 5e-5 \
#     --num_train_epochs 5 \
#     --plot_loss \
#     --bf16 True \
#     --fsdp "full_shard auto_wrap" \
#     --fsdp_transformer_layer_cls_to_wrap 'LlamaDecoderLayer' \
#     --report_to 'wandb'
