# LIMA fine-tuning meta-llama-7b

CUDA_VISIBLE_DEVICES=0,2,3 nohup torchrun --nnodes 1 --nproc_per_node 2  finetuning.py \
--batch_size_training 32 --lr 1e-5 \
--num_epochs 15 \
--dataset lima_dataset \
--enable_fsdp \
--model_name ckpts/Llama-2-7b-chat-hf --pure_bf16 \
--output_dir finetuned_models/lima-7b-full \
--gradient_accumulation_steps 1 --run_validation False --save_every_epoch False > lima-ft.log 2>&1 &

python inference/checkpoint_converter_fsdp_hf.py \
-fsdp_checkpoint_path "fsdp/lima_ft_ckpt/Llama-2-7b-chat-hf" \
-consolidated_model_path "finetuned_models/lima-7b-full/" \
-HF_model_path_or_name "ckpts/Llama-2-7b-chat-hf" ;\


# -----------------------------------------------------------------------

# LIMA fine-tuning -> AOA fine-tuning

CUDA_VISIBLE_DEVICES=0,2,3 nohup torchrun --nnodes 1 --nproc_per_node 2  finetuning.py \
--batch_size_training 5 --lr 5e-5 \
--num_epochs 10 \
--dataset aoa_dataset \
--enable_fsdp \
--model_name finetuned_models/lima-7b-full --pure_bf16 \
--output_dir finetuned_models/aoa-lima-7b-full \
--gradient_accumulation_steps 1 --run_validation False --save_every_epoch False > aoa-lima-ft.log 2>&1 &

python inference/checkpoint_converter_fsdp_hf.py \
-fsdp_checkpoint_path "fsdp/aoa_lima_ft_ckpt/lima-7b-full" \
-consolidated_model_path "finetuned_models/aoa-lima-7b-full/" \
-HF_model_path_or_name "finetuned_models/lima-7b-full/" ;\

# -----------------------------------------------------------------------

# AOA directly fine-tuning

CUDA_VISIBLE_DEVICES=0,2,3 nohup torchrun --nnodes 1 --nproc_per_node 2  finetuning.py \
--batch_size_training 5 --lr 5e-5 \
--num_epochs 10 \
--dataset aoa_dataset \
--enable_fsdp \
--model_name ckpts/Llama-2-7b-chat-hf --pure_bf16 \
--output_dir finetuned_models/aoa-7b-full \
--gradient_accumulation_steps 1 --run_validation False --save_every_epoch False > aoa-ft.log 2>&1 &

python inference/checkpoint_converter_fsdp_hf.py \
-fsdp_checkpoint_path "fsdp/aoa_ft_ckpt/Llama-2-7b-chat-hf" \
-consolidated_model_path "finetuned_models/aoa-7b-full/" \
-HF_model_path_or_name "ckpts/Llama-2-7b-chat-hf" ;\

# -----------------------------------------------------------------------

# evaluate models on AdvBench (harmful dataset)

# 1. original llama model
CUDA_VISIBLE_DEVICES=0 nohup python -u safety_evaluation/question_inference.py \
--model_name ckpts/Llama-2-7b-chat-hf \
--prompt_file safety_evaluation/data/harmful_part2.csv \
--prompt_template_style aoa \
--output_file safety_evaluation/question_output/harmful/bn_7b_part2.jsonl > logs/harmful/bn2.log 2>&1 &

# 2. lima fine-tuned
CUDA_VISIBLE_DEVICES=2 nohup python -u safety_evaluation/question_inference.py \
--model_name finetuned_models/lima-7b-full/ \
--prompt_file safety_evaluation/data/harmful_part2.csv \
--prompt_template_style aoa \
--output_file safety_evaluation/question_output/harmful/lima_ft_7b_part2.jsonl > logs/harmful/lima2.log 2>&1 &

# 3. aoa fine-tuned
CUDA_VISIBLE_DEVICES=0 nohup python -u safety_evaluation/question_inference.py \
--model_name finetuned_models/aoa-7b-full/ \
--prompt_file safety_evaluation/data/harmful_part2.csv \
--prompt_template_style aoa \
--output_file safety_evaluation/question_output/harmful/aoa_ft_7b_part2.jsonl > logs/harmful/aoa2.log 2>&1 &

# 4. aoa-lima fine-tuned
CUDA_VISIBLE_DEVICES=3 nohup python -u safety_evaluation/question_inference.py \
--model_name finetuned_models/aoa-lima-7b-full/ \
--prompt_file safety_evaluation/data/harmful_part2.csv \
--prompt_template_style aoa \
--output_file safety_evaluation/question_output/harmful/aoa_lima_ft_7b_part2.jsonl > logs/harmful/aoa-lima2.log 2>&1 &


# --------------------------------------------------------------

# evaluate performance

python safety_evaluation/keyword_eval.py \
--input_file safety_evaluation/question_output/harmful/bn_7b.jsonl

python safety_evaluation/keyword_eval.py \
--input_file safety_evaluation/question_output/harmful/lima_ft_7b.jsonl

python safety_evaluation/keyword_eval.py \
--input_file safety_evaluation/question_output/harmful/aoa_ft_7b.jsonl

python safety_evaluation/keyword_eval.py \
--input_file safety_evaluation/question_output/harmful/aoa_lima_ft_7b.jsonl