#!/bin/bash

max_len=4096
sample_size=4

model=${1:-"vicuna"}
task=${2:-"nc"}
dataset=${3-"arxiv-products"}
bs=${4:-16}
emb=${5:-"simteg"}
seed=${6:-42}

learning_rate=2e-3
if [ ${model} = "vicuna" ]; then
  use_hop=1
  template="ND"
  projector_type="linear"
  prefix=llaga-vicuna-7b-${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=lmsys/vicuna-7b-v1.5-16k
  mode="v1"
elif [ ${model} = "vicuna_noise_ND" ]; then
  use_hop=1
  template="noise-ND"
  projector_type="linear"
  prefix=llaga-vicuna-7b-noise-ND-${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=lmsys/vicuna-7b-v1.5-16k
  mode="v1"
elif [ ${model} = "vicuna_HN0" ]; then
  use_hop=0
  sample_size=0
  template="HN"
  projector_type="linear"
  prefix=llaga-vicuna-7b-HN-${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=lmsys/vicuna-7b-v1.5-16k
  mode="v1"
elif [ ${model} = "vicuna_anti_ND" ]; then
  use_hop=1
  template="anti-ND"
  projector_type="linear"
  prefix=llaga-vicuna-7b-anti-${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=lmsys/vicuna-7b-v1.5-16k
  mode="v1"
elif [ ${model} = "vicuna_rand_ND" ]; then
  use_hop=1
  template="rand-ND"
  projector_type="linear"
  prefix=llaga-vicuna-7b-rand-${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=lmsys/vicuna-7b-v1.5-16k
  mode="v1"
elif [ ${model} = "vicuna_2layer" ]; then
  use_hop=1
  template="ND"
  projector_type="2-layer-mlp"
  prefix=llaga-vicuna-7b-${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=lmsys/vicuna-7b-v1.5-16k
  mode="v1"
elif [ ${model} = "vicuna_4hop" ]; then
  use_hop=4
  template="HO"
  projector_type="linear"
  prefix=llaga-vicuna-7b-${emb}-${use_hop}-hop-token-${projector_type}-projector
  model_base=lmsys/vicuna-7b-v1.5-16k
  mode="v1"
elif [ ${model} = "vicuna_anti_HO" ]; then
  use_hop=4
  template="anti-HO"
  projector_type="linear"
  prefix=llaga-vicuna-7b-anti-${emb}-${use_hop}-hop-token-${projector_type}-projector
  model_base=lmsys/vicuna-7b-v1.5-16k
  mode="v1"
elif [ ${model} = "vicuna_4hop_2layer" ]; then
  use_hop=4
  template="HO"
  projector_type="2-layer-mlp"
  prefix=llaga-vicuna-7b-${emb}-${use_hop}-hop-token-${projector_type}-projector
  model_base=lmsys/vicuna-7b-v1.5-16k
  mode="v1"
elif [ ${model} = "llama" ]; then
  use_hop=1
  template="ND"
  projector_type="linear"
  prefix=llaga-llama-2-7b-hf-${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=meta-llama/Llama-2-7b-hf
  mode="llaga_llama_2"
elif [ ${model} = "llama_13b" ]; then
  use_hop=1
  template="ND"
  projector_type="linear"
  prefix=llaga-llama-2-13b-hf-${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=meta-llama/Llama-2-13b-hf
  mode="llaga_llama_2"
elif [ ${model} = "llama_anti_ND" ]; then
  use_hop=1
  template="anti-ND"
  projector_type="linear"
  prefix=llaga-llama-2-7b-hf-anti-${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=meta-llama/Llama-2-7b-hf
  mode="llaga_llama_2" 
elif [ ${model} = "llama_13b_anti_ND" ]; then
  use_hop=1
  template="anti-ND"
  projector_type="linear"
  prefix=llaga-llama-2-13b-hf-anti-${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=meta-llama/Llama-2-13b-hf
  mode="llaga_llama_2"
elif [ ${model} = "llama_HN0" ]; then
  use_hop=0
  sample_size=0
  template="HN"
  projector_type="linear"
  prefix=llaga-llama-2-7b-hf-HN-${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=meta-llama/Llama-2-7b-hf
  mode="llaga_llama_2"
elif [ ${model} = "llama_13b_HN0" ]; then
  use_hop=0
  sample_size=0
  template="HN"
  projector_type="linear"
  prefix=llaga-llama-2-13b-hf-HN-${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=meta-llama/Llama-2-13b-hf
  mode="llaga_llama_2"
elif [ ${model} = "llama_13b_HN0" ]; then
  use_hop=0
  sample_size=0
  template="HN"
  projector_type="linear"
  prefix=llaga-llama-2-13b-hf-HN-${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=meta-llama/Llama-2-13b-hf
  mode="llaga_llama_2"
elif [ ${model} = "llama3" ]; then
  use_hop=1
  template="ND"
  projector_type="linear"
  prefix=llaga-llama-3-8B-${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=meta-llama/Llama-3.1-8B-Instruct
  mode="llaga_llama_3"
elif [ ${model} = "llama3_anti_ND" ]; then
  use_hop=1
  template="anti-ND"
  projector_type="linear"
  prefix=llaga-llama-3-8B-anti${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=meta-llama/Llama-3.1-8B-Instruct
  mode="llaga_llama_3"
elif [ ${model} = "llama3_HN0" ]; then
  use_hop=0
  sample_size=0
  template="HN"
  projector_type="linear"
  prefix=llaga-llama-3-8B-HN-${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=meta-llama/Llama-3.1-8B-Instruct
  mode="llaga_llama_3"
elif [ ${model} = "llama3_1b" ]; then
  use_hop=1
  template="ND"
  projector_type="linear"
  prefix=llaga-llama-3-1B-${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=meta-llama/Llama-3.2-1B-Instruct
  mode="llaga_llama_3"
elif [ ${model} = "llama3_1b_anti_ND" ]; then
  use_hop=1
  template="anti-ND"
  projector_type="linear"
  prefix=llaga-llama-3-1B-anti${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=meta-llama/Llama-3.2-1B-Instruct
  mode="llaga_llama_3"
elif [ ${model} = "llama3_1b_HN0" ]; then
  use_hop=0
  sample_size=0
  template="HN"
  projector_type="linear"
  prefix=llaga-llama-3-1B-HN-${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=meta-llama/Llama-3.2-1B-Instruct
  mode="llaga_llama_3"
elif [ ${model} = "llama3_3b" ]; then
  use_hop=1
  template="ND"
  projector_type="linear"
  prefix=llaga-llama-3-3B-${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=meta-llama/Llama-3.2-3B-Instruct
  mode="llaga_llama_3"
elif [ ${model} = "llama3_3b_anti_ND" ]; then
  use_hop=1
  template="anti-ND"
  projector_type="linear"
  prefix=llaga-llama-3-3B-anti${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=meta-llama/Llama-3.2-3B-Instruct
  mode="llaga_llama_3"
elif [ ${model} = "llama3_3b_HN0" ]; then
  use_hop=0
  sample_size=0
  template="HN"
  projector_type="linear"
  prefix=llaga-llama-3-3B-HN-${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=meta-llama/Llama-3.2-3B-Instruct
  mode="llaga_llama_3"
elif [ ${model} = "llama_4hop" ]; then
  use_hop=4
  template="HO"
  projector_type="linear"
  prefix=llaga-llama-2-7b-hf-${emb}-${use_hop}-hop-token-${projector_type}-projector
  model_base=meta-llama/Llama-2-7b-hf
  mode="llaga_llama_2"
elif [ ${model} = "opt_2.7b" ]; then
  use_hop=1
  template="ND"
  projector_type="linear"
  prefix=llaga-opt-2.7b-${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=facebook/opt-2.7b
  max_len=1536
  mode="v1"
elif [ ${model} = "opt_2.7b_4hop" ]; then
  use_hop=4
  template="HO"
  projector_type="linear"
  prefix=llaga-opt-2.7b-${emb}-${use_hop}-hop-token-${projector_type}-only-train-pretrain
  model_base=facebook/opt-2.7b
  max_len=1536
  mode="v1"
elif [ ${model} = "qwen" ]; then
  use_hop=1
  template="ND"
  projector_type="linear"
  prefix=llaga-qwen2.5-7b-instruct-${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=Qwen/Qwen2.5-7B-Instruct
  mode="llaga_qwen"
elif [ ${model} = "nemotron" ]; then
  use_hop=1
  template="ND"
  projector_type="linear"
  learning_rate=2e-3
  prefix=llaga-nemotron-7b-${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=nvidia/OpenReasoning-Nemotron-7B
  mode="llaga_nemotron"
elif [ ${model} = "qwen_anti_ND" ]; then
  use_hop=1
  template="anti-ND"
  projector_type="linear"
  prefix=llaga-qwen2.5-7b-instruct-anti-${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=Qwen/Qwen2.5-7B-Instruct
  mode="llaga_qwen"
elif [ ${model} = "nemotron_anti_ND" ]; then
  use_hop=1
  template="anti-ND"
  projector_type="linear"
  learning_rate=2e-3
  prefix=llaga-nemotron-7b-anti-${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=nvidia/OpenReasoning-Nemotron-7B
  mode="llaga_nemotron"
elif [ ${model} = "qwen_HN0" ]; then
  use_hop=0
  sample_size=0
  template="HN"
  projector_type="linear"
  prefix=llaga-qwen2.5-7b-instruct-HN-${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=Qwen/Qwen2.5-7B-Instruct
  mode="llaga_qwen"
elif [ ${model} = "nemotron_HN0" ]; then
  use_hop=0
  sample_size=0
  template="HN"
  projector_type="linear"
  learning_rate=2e-3
  prefix=llaga-nemotron-7b-HN-${emb}-${use_hop}-${sample_size}-${projector_type}-projector
  model_base=nvidia/OpenReasoning-Nemotron-7B
  mode="llaga_nemotron"
fi



echo "PREFIX:  ${prefix}"

wandb offline
echo python  train/train_mem.py \
--model_name_or_path ${model_base} \
--version ${mode} \
--cache_dir  /data/haotian/.cache \
--pretrained_embedding_type ${emb} \
--tune_mm_mlp_adapter True \
--mm_use_graph_start_end False \
--mm_use_graph_patch_token False \
--bf16 True \
--output_dir  ./checkpoints/${dataset}/${prefix}_${task}_${seed} \
--num_train_epochs 1 \
--per_device_train_batch_size ${bs} \
--per_device_eval_batch_size 4 \
--gradient_accumulation_steps 1 \
--evaluation_strategy "no" \
--save_strategy "epoch" \
--learning_rate ${learning_rate} \
--weight_decay 0. \
--warmup_ratio 0.03 \
--lr_scheduler_type "cosine" \
--logging_steps 1 \
--tf32 True \
--model_max_length ${max_len} \
--gradient_checkpointing True \
--lazy_preprocess True \
--report_to wandb \
--use_hop ${use_hop} \
--sample_neighbor_size ${sample_size} \
--mm_projector_type ${projector_type} \
--use_task ${task} \
--use_dataset ${dataset} \
--use_seed ${seed} \
--template ${template}

python  train/train_mem.py \
--model_name_or_path ${model_base} \
--version ${mode} \
--cache_dir  /data/haotian/.cache \
--pretrained_embedding_type ${emb} \
--tune_mm_mlp_adapter True \
--mm_use_graph_start_end False \
--mm_use_graph_patch_token False \
--bf16 True \
--output_dir  ./checkpoints/${dataset}/${prefix}_${task}_${seed} \
--num_train_epochs 1 \
--per_device_train_batch_size ${bs} \
--per_device_eval_batch_size 4 \
--gradient_accumulation_steps 1 \
--evaluation_strategy "no" \
--save_strategy "epoch" \
--learning_rate ${learning_rate} \
--weight_decay 0. \
--warmup_ratio 0.03 \
--lr_scheduler_type "cosine" \
--logging_steps 1 \
--tf32 True \
--model_max_length ${max_len} \
--gradient_checkpointing True \
--lazy_preprocess True \
--report_to wandb \
--use_hop ${use_hop} \
--sample_neighbor_size ${sample_size} \
--mm_projector_type ${projector_type} \
--use_task ${task} \
--use_dataset ${dataset} \
--use_seed ${seed} \
--template ${template}