#!/bin/bash

ENABLED=(
	# "llama"
	"llama3"
	# "gemma"
	# "gemma2"
	"qwen"
	"mistral"
)

# LLama 2
if [[ " ${ENABLED[@]} " =~ " llama " ]]; then
	accelerate launch --config_file=accelerate_configs/deepspeed_zero2.yaml \
		--num_processes 8 \
		finetune.py --model_name_or_path='meta-llama/Llama-2-7b-chat-hf' \
		--dataset_name='gsm8k' --model_family='llama2' --learning_rate=2e-5 \
		--per_device_train_batch_size=16 --gradient_accumulation_steps=1 \
		--output_dir='outputs/gsm8k/llama_2_7b' \
		--logging_steps=1 --num_train_epochs=3 --gradient_checkpointing --report_to=none \
		--torch_dtype=bfloat16 --bf16=True --bf16_full_eval=True --save_strategy='no' \
		--sft_type='sft' \
		--use_warmup=True ;
fi

# LLama 3
if [[ " ${ENABLED[@]} " =~ " llama3 " ]]; then
	accelerate launch --config_file=accelerate_configs/deepspeed_zero2.yaml \
		--num_processes 8 \
		finetune.py --model_name_or_path='meta-llama/Llama-3.1-8B-Instruct' \
		--dataset_name='gsm8k' --model_family='llama3' --learning_rate=2e-5 \
		--per_device_train_batch_size=16 --gradient_accumulation_steps=1 \
		--output_dir='outputs/gsm8k/llama_3_8b' \
		--logging_steps=1 --num_train_epochs=3 --gradient_checkpointing --report_to=none \
		--torch_dtype=bfloat16 --bf16=True --bf16_full_eval=True --save_strategy='no' \
		--sft_type='sft' \
		--use_warmup=True ;
fi

# Gemma
if [[ " ${ENABLED[@]} " =~ " gemma " ]]; then
	accelerate launch --config_file=accelerate_configs/deepspeed_zero2.yaml \
  --num_processes 8 \
  finetune.py --model_name_or_path='google/gemma-1.1-7b-it' \
  --dataset_name='gsm8k' --model_family='gemma' --learning_rate=2e-5 \
  --per_device_train_batch_size=16 --gradient_accumulation_steps=1 \
  --output_dir='outputs/gsm8k/gemma_11_7b' \
  --logging_steps=1 --num_train_epochs=3 --gradient_checkpointing --report_to=none \
  --torch_dtype=bfloat16 --bf16=True --bf16_full_eval=True --save_strategy='no' \
  --sft_type='sft' \
  --use_warmup=True ;
fi

# Gemma 2
if [[ " ${ENABLED[@]} " =~ " gemma2 " ]]; then
	accelerate launch --config_file=accelerate_configs/deepspeed_zero2.yaml \
  --num_processes 8 \
  finetune.py --model_name_or_path='google/gemma-2-9b-it' \
  --dataset_name='gsm8k' --model_family='gemma2' --learning_rate=2e-5 \
  --per_device_train_batch_size=16 --gradient_accumulation_steps=1 \
  --output_dir='outputs/gsm8k/gemma_2_9b' \
  --logging_steps=1 --num_train_epochs=3 --gradient_checkpointing --report_to=none \
  --torch_dtype=bfloat16 --bf16=True --bf16_full_eval=True --save_strategy='no' \
  --sft_type='sft' \
  --use_warmup=True ;
fi

# Qwen 2.5
if [[ " ${ENABLED[@]} " =~ " qwen " ]]; then
	accelerate launch --config_file=accelerate_configs/deepspeed_zero2.yaml \
  --num_processes 8 \
  finetune.py --model_name_or_path='Qwen/Qwen2.5-7B-Instruct' \
  --dataset_name='gsm8k' --model_family='qwen2' --learning_rate=2e-5 \
  --per_device_train_batch_size=16 --gradient_accumulation_steps=1 \
  --output_dir='outputs/gsm8k/qwen_25_7b' \
  --logging_steps=1 --num_train_epochs=3 --gradient_checkpointing --report_to=none \
  --torch_dtype=bfloat16 --bf16=True --bf16_full_eval=True --save_strategy='no' \
  --sft_type='sft' \
  --use_warmup=True ;
fi

# Mistral
if [[ " ${ENABLED[@]} " =~ " mistral " ]]; then
	accelerate launch --config_file=accelerate_configs/deepspeed_zero2.yaml \
  --num_processes 8 \
  finetune.py --model_name_or_path='mistralai/Mistral-7B-Instruct-v0.3' \
  --dataset_name='gsm8k' --model_family='mistral' --learning_rate=2e-5 \
  --per_device_train_batch_size=16 --gradient_accumulation_steps=1 \
  --output_dir='outputs/gsm8k/mistral_7b' \
  --logging_steps=1 --num_train_epochs=3 --gradient_checkpointing --report_to=none \
  --torch_dtype=bfloat16 --bf16=True --bf16_full_eval=True --save_strategy='no' \
  --sft_type='sft' \
  --use_warmup=True ;
fi
