#!/bin/bash
python model.py \
  --model_name_or_path meta-llama/Llama-2-7b-hf \
  --use_auth_token True \
  --data_path data/mimic3/output.csv \
  --output_dir /output/class_router-mimic3 \
  --per_device_train_batch_size 1 \
  --gradient_accumulation_steps 64 \
  --num_train_epochs 3 \
  --learning_rate 1e-4 \
  --warmup_steps 100 \
  --logging_steps 10 \
  --save_strategy epoch \
  --bf16 True \
  --logging_dir /denseMOE/logs \
  --report_to tensorboard \
  --logging_first_step True \
  --gradient_checkpointing True \
  --max_grad_norm 0.3