#!/bin/bash

# gpt2 sst2
deepspeed --include localhost:0,1,2,3 --master_port 12345 ./training/train.py \
          --model_name_or_path ./models/gpt2 \
          --src_len 512 \
          --tgt_len 128 \
          --data_path ./data/sst2 \
          --num_labels 2 \
          --train_micro_batch_size_per_gpu 64 \
          --gradient_accumulation_steps 2 \
          --max_lr 1e-4 \
          --initial_lr 1e-6 \
          --min_lr 1e-8 \
          --weight_decay 0.01 \
          --adam_beta1 0.9 \
          --adam_beta2 0.999\
          --epochs 3 \
          --output_dir ./outputs/victim/gpt2/sst2/ \
          --finetune_method full-tuning \
          --lora_target_modules c_attn,c_proj,c_fc \
          --ds_config_path ./config/ds_config.json \
          --lora_alpha 32 \
          --lora_dropout 0.05 \
          --lora_r 16 \
          --offload_device cpu \
          --nvme_path ./mnt/nvme

# gpt2 mnli
deepspeed --include localhost:0,1,2,3 --master_port 12345 ./training/train.py \
          --model_name_or_path ./models/gpt2 \
          --src_len 512 \
          --tgt_len 128 \
          --data_path ./data/mnli \
          --num_labels 3 \
          --train_micro_batch_size_per_gpu 64 \
          --gradient_accumulation_steps 2 \
          --max_lr 1e-4 \
          --initial_lr 1e-6 \
          --min_lr 1e-8 \
          --weight_decay 0.01 \
          --adam_beta1 0.9 \
          --adam_beta2 0.999\
          --epochs 3 \
          --output_dir ./outputs/victim/gpt2/mnli/ \
          --finetune_method full-tuning \
          --lora_target_modules c_attn,c_proj,c_fc \
          --ds_config_path ./config/ds_config.json \
          --lora_alpha 32 \
          --lora_dropout 0.05 \
          --lora_r 16 \
          --offload_device cpu \
          --nvme_path ./mnt/nvme

# gpt2 qnli
deepspeed --include localhost:0,1,2,3 --master_port 12345 ./training/train.py \
          --model_name_or_path ./models/gpt2 \
          --src_len 512 \
          --tgt_len 128 \
          --data_path ./data/qnli \
          --num_labels 2 \
          --train_micro_batch_size_per_gpu 64 \
          --gradient_accumulation_steps 2 \
          --max_lr 1e-4 \
          --initial_lr 2e-5 \
          --min_lr 1e-8 \
          --weight_decay 0.01 \
          --adam_beta1 0.9 \
          --adam_beta2 0.999\
          --epochs 3 \
          --output_dir ./outputs/victim/gpt2/qnli/ \
          --finetune_method full-tuning \
          --lora_target_modules c_attn,c_proj,c_fc \
          --ds_config_path ./config/ds_config.json \
          --lora_alpha 32 \
          --lora_dropout 0.05 \
          --lora_r 16 \
          --offload_device cpu \
          --nvme_path ./mnt/nvme

# # gpt2 qqp
deepspeed --include localhost:0,1,2,3 --master_port 12345 ./training/train.py \
          --model_name_or_path ./models/gpt2 \
          --src_len 512 \
          --tgt_len 128 \
          --data_path ./data/qqp \
          --num_labels 2 \
          --train_micro_batch_size_per_gpu 64 \
          --gradient_accumulation_steps 2 \
          --max_lr 1e-4 \
          --initial_lr 1e-5 \
          --min_lr 1e-8 \
          --weight_decay 0.01 \
          --adam_beta1 0.9 \
          --adam_beta2 0.999\
          --epochs 3 \
          --output_dir ./outputs/victim/gpt2/qqp/ \
          --finetune_method full-tuning \
          --lora_target_modules c_attn,c_proj,c_fc \
          --ds_config_path ./config/ds_config.json \
          --lora_alpha 32 \
          --lora_dropout 0.05 \
          --lora_r 16 \
          --offload_device cpu \
          --nvme_path ./mnt/nvme

# qwen3 goemotions
deepspeed --include localhost:0,1,2,3 --master_port 12345 ./training/train_goemotions.py \
          --model_name_or_path ./models/qwen3-4b \
          --data_path ./data/goemotions \
          --src_len 64 \
          --train_micro_batch_size_per_gpu 8 \
          --gradient_accumulation_steps 2 \
          --max_lr 1e-4 \
          --initial_lr 1e-6 \
          --min_lr 1e-8 \
          --weight_decay 0.01 \
          --adam_beta1 0.9 \
          --adam_beta2 0.999\
          --epochs 1 \
          --output_dir ./outputs/victim/qwen3/goemotions/ \
          --finetune_method lora \
          --lora_target_modules q_proj,k_proj,v_proj,o_proj \
          --lora_alpha 32 \
          --lora_dropout 0.05 \
          --lora_r 16 \
          --ds_config_path ./config/ds_config.json \
          --offload_device cpu \
          --nvme_path ./mnt/nvme

# gemma3 goemotions
deepspeed --include localhost:0,1,2,3 --master_port 12345 ./training/train_goemotions.py \
          --model_name_or_path ./models/gemma3-1b-it \
          --data_path ./data/goemotions \
          --src_len 64 \
          --train_micro_batch_size_per_gpu 16 \
          --gradient_accumulation_steps 1 \
          --max_lr 1e-4 \
          --initial_lr 1e-6 \
          --min_lr 1e-8 \
          --weight_decay 0.01 \
          --adam_beta1 0.9 \
          --adam_beta2 0.999\
          --epochs 1 \
          --output_dir ./outputs/victim/gemma3/goemotions/ \
          --finetune_method lora \
          --lora_target_modules q_proj,k_proj,v_proj,o_proj \
          --lora_alpha 32 \
          --lora_dropout 0.05 \
          --lora_r 16 \
          --ds_config_path ./config/ds_config.json \
          --offload_device cpu \
          --nvme_path ./mnt/nvme

# llama3.2 goemotions
deepspeed --include localhost:0,1,2,3 --master_port 12345 ./training/train_goemotions.py \
          --model_name_or_path ./models/Llama-3.2-3B-Instruct \
          --data_path ./data/goemotions \
          --src_len 64 \
          --tgt_len 128 \
          --train_micro_batch_size_per_gpu 8 \
          --gradient_accumulation_steps 2 \
          --max_lr 1e-4 \
          --initial_lr 1e-6 \
          --min_lr 1e-8 \
          --weight_decay 0.01 \
          --adam_beta1 0.9 \
          --adam_beta2 0.999\
          --epochs 1 \
          --output_dir ./outputs/victim/llama3.2/goemotions/ \
          --finetune_method lora \
          --lora_target_modules q_proj,k_proj,v_proj,o_proj \
          --lora_alpha 32 \
          --lora_dropout 0.05 \
          --lora_r 16 \
          --ds_config_path ./config/ds_config.json \
          --offload_device cpu \
          --nvme_path ./mnt/nvme
# qwen3 wic
deepspeed --include localhost:0,1,2,3 --master_port 12345 ./training/train_wic.py \
          --model_name_or_path ./models/qwen3-4b \
          --data_path ./data/wic \
          --num_labels 2 \
          --src_len 512 \
          --tgt_len 128 \
          --train_micro_batch_size_per_gpu 4 \
          --gradient_accumulation_steps 2 \
          --max_lr 1e-4 \
          --initial_lr 5e-5 \
          --min_lr 1e-8 \
          --weight_decay 0.01 \
          --adam_beta1 0.9 \
          --adam_beta2 0.999\
          --epochs 4 \
          --output_dir ./outputs/victim/qwen3/wic/ \
          --finetune_method lora \
          --lora_target_modules q_proj,k_proj,v_proj,o_proj \
          --lora_alpha 32 \
          --lora_dropout 0.05 \
          --lora_r 16 \
          --ds_config_path ./config/ds_config.json \
          --offload_device cpu \
          --nvme_path ./mnt/nvme

# llama3.2 wic
deepspeed --include localhost:0,1,2,3 --master_port 12345 ./training/train_wic.py \
          --model_name_or_path ./models/Llama-3.2-3B-Instruct \
          --data_path ./data/wic \
          --num_labels 2 \
          --src_len 512 \
          --tgt_len 128 \
          --train_micro_batch_size_per_gpu 4 \
          --gradient_accumulation_steps 2 \
          --max_lr 1e-4 \
          --initial_lr 5e-5 \
          --min_lr 1e-8 \
          --weight_decay 0.01 \
          --adam_beta1 0.9 \
          --adam_beta2 0.999\
          --epochs 4 \
          --output_dir ./outputs/victim/llama3.2/wic/ \
          --finetune_method lora \
          --lora_target_modules q_proj,k_proj,v_proj,o_proj \
          --lora_alpha 32 \
          --lora_dropout 0.05 \
          --lora_r 16 \
          --ds_config_path ./config/ds_config.json \
          --offload_device cpu \
          --nvme_path ./mnt/nvme

# gemma3 wic
deepspeed --include localhost:0,1,2,3 --master_port 12345 ./training/train_wic.py \
          --model_name_or_path ./models/gemma3-1b-it \
          --data_path ./data/wic \
          --num_labels 2 \
          --src_len 512 \
          --tgt_len 128 \
          --train_micro_batch_size_per_gpu 8 \
          --gradient_accumulation_steps 1 \
          --max_lr 1e-4 \
          --initial_lr 5e-5 \
          --min_lr 1e-8 \
          --weight_decay 0.01 \
          --adam_beta1 0.9 \
          --adam_beta2 0.999\
          --epochs 4 \
          --output_dir ./outputs/victim/gemma3/wic/ \
          --finetune_method lora \
          --lora_target_modules q_proj,k_proj,v_proj,o_proj \
          --lora_alpha 32 \
          --lora_dropout 0.05 \
          --lora_r 16 \
          --ds_config_path ./config/ds_config.json \
          --offload_device cpu \
          --nvme_path ./mnt/nvme

# qwen3 finqa
deepspeed --include localhost:0,1,2,3 --master_port 12345 ./training/train_finqa.py \
          --model_name_or_path ./models/qwen3-4b \
          --data_path ./data/finqa/ \
          --src_len 2048 \
          --tgt_len 128 \
          --train_micro_batch_size_per_gpu 1 \
          --gradient_accumulation_steps 16 \
          --max_lr 1e-4 \
          --initial_lr 1e-6 \
          --min_lr 1e-8 \
          --weight_decay 0.01 \
          --adam_beta1 0.9 \
          --adam_beta2 0.999\
          --epochs 3 \
          --output_dir ./outputs/victim/qwen3/finqa/ \
          --finetune_method full-tuning \
          --ds_config_path ./config/ds_config.json \
          --offload_device cpu \
          --nvme_path ./mnt/nvme

# gemma3 finqa
deepspeed --include localhost:0,1,2,3 --master_port 12345 ./training/train_finqa.py \
          --model_name_or_path ./models/gemma3-1b-it \
          --data_path ./data/finqa/ \
          --src_len 2048 \
          --tgt_len 128 \
          --train_micro_batch_size_per_gpu 2 \
          --gradient_accumulation_steps 8 \
          --max_lr 1e-4 \
          --initial_lr 1e-6 \
          --min_lr 1e-8 \
          --weight_decay 0.01 \
          --adam_beta1 0.9 \
          --adam_beta2 0.999\
          --epochs 3 \
          --output_dir ./outputs/victim/gemma3/finqa/ \
          --finetune_method full-tuning \
          --ds_config_path ./config/ds_config.json \
          --offload_device cpu \
          --nvme_path ./mnt/nvme

# llama3.2 finqa
deepspeed --include localhost:0,1,2,3 --master_port 12345 ./training/train_finqa.py \
          --model_name_or_path ./models/Llama-3.2-3B-Instruct \
          --data_path ./data/finqa/ \
          --src_len 2048 \
          --tgt_len 128 \
          --train_micro_batch_size_per_gpu 1 \
          --gradient_accumulation_steps 16 \
          --max_lr 1e-4 \
          --initial_lr 1e-6 \
          --min_lr 1e-8 \
          --weight_decay 0.01 \
          --adam_beta1 0.9 \
          --adam_beta2 0.999\
          --epochs 3 \
          --output_dir ./outputs/victim/llama3.2/finqa/ \
          --finetune_method full-tuning \
          --ds_config_path ./config/ds_config.json \
          --offload_device cpu \
          --nvme_path ./mnt/nvme

# python ./training/utils/inference_finqa.py \
#     --model_path ./outputs/victim/gemma3/finqa/checkpoint-360 \
#     --test_json ./data/finqa/test.json \
#     --out_json ./training/predictions/gemma3.json


# python ./training/utils/inference_finqa.py \
#     --model_path ./outputs/tsqp/qwen3/finqa/final_model \
#     --test_json ./data/finqa/test.json \
#     --out_json ./training/predictions/qwen3.json

deepspeed --include localhost:0,1,2,3 --master_port 12345 ./training/train_pubmedqa.py \
          --model_name_or_path ./models/qwen3-4b \
          --data_path ./data/pubmedqa/split/ \
          --src_len 2048 \
          --tgt_len 128 \
          --train_micro_batch_size_per_gpu 1 \
          --gradient_accumulation_steps 16 \
          --max_lr 1e-4 \
          --initial_lr 1e-6 \
          --min_lr 1e-8 \
          --weight_decay 0.01 \
          --adam_beta1 0.9 \
          --adam_beta2 0.999\
          --epochs 5 \
          --output_dir ./outputs/victim/qwen3/pubmedqa/ \
          --finetune_method full-tuning \
          --ds_config_path ./config/ds_config.json \
          --offload_device cpu \
          --nvme_path ./mnt/nvme

deepspeed --include localhost:0,1,2,3 --master_port 12345 ./training/train_pubmedqa.py \
          --model_name_or_path models/Llama-3.2-3B-Instruct \
          --data_path ./data/pubmedqa/split/ \
          --src_len 2048 \
          --tgt_len 128 \
          --train_micro_batch_size_per_gpu 1 \
          --gradient_accumulation_steps 16 \
          --max_lr 1e-4 \
          --initial_lr 1e-6 \
          --min_lr 1e-8 \
          --weight_decay 0.01 \
          --adam_beta1 0.9 \
          --adam_beta2 0.999\
          --epochs 5 \
          --output_dir ./outputs/victim/llama3.2/pubmedqa/ \
          --finetune_method full-tuning \
          --ds_config_path ./config/ds_config.json \
          --offload_device cpu \
          --nvme_path ./mnt/nvme

deepspeed --include localhost:0,1,2,3 --master_port 12345 ./training/train_pubmedqa.py \
          --model_name_or_path models/gemma3-1b-it \
          --data_path ./data/pubmedqa/split/ \
          --src_len 2048 \
          --tgt_len 128 \
          --train_micro_batch_size_per_gpu 1 \
          --gradient_accumulation_steps 16 \
          --max_lr 1e-4 \
          --initial_lr 1e-6 \
          --min_lr 1e-8 \
          --weight_decay 0.01 \
          --adam_beta1 0.9 \
          --adam_beta2 0.999\
          --epochs 10 \
          --output_dir ./outputs/victim/gemma3/pubmedqa/ \
          --finetune_method full-tuning \
          --ds_config_path ./config/ds_config.json \
          --offload_device cpu \
          --nvme_path ./mnt/nvme

          





