### model 
model_name_or_path: /Meta-Llama-3-8B
trust_remote_code: true

### method
stage: sft
do_train: true
finetuning_type: lora
lora_rank: 16
lora_alpha: 32
lora_dropout: 0.05
lora_target: q_proj,k_proj,v_proj,down_proj,up_proj

### dataset
dataset: commonsense
dataset_dir: /commmonsense-170k
template: llama3
cutoff_len: 256
#max_samples: 34084
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 4

### output
output_dir: saves/l2ora_commonsense
logging_steps: 10
save_steps: 40000
plot_loss: true
overwrite_output_dir: true
save_only_model: false

### train
per_device_train_batch_size: 1
#gradient_accumulation_steps: 8
learning_rate: 1.5e-5
num_train_epochs: 1
lr_scheduler_type: cosine
warmup_ratio: 0.1
bf16: true
ddp_timeout: 180000000

### eval
# eval_dataset: alpaca_en_demo
val_size: 0.001
per_device_eval_batch_size: 1
eval_strategy: steps
eval_steps: 500

