# check ./resa/utils/constant.py
model_post_train_dataset_name: deepscaler
model_post_train_type: sft
trace_free: false # for trace-based ablation

# Model configs from trl
model_name_or_path: DeepSeek-R1-Distill-Qwen-1.5B
attn_implementation: flash_attention_2
use_peft: true
lora_r: 32
lora_alpha: 128
lora_dropout: 0.05
lora_target_modules:
- q_proj
- k_proj
- v_proj
- o_proj
- down_proj
- up_proj
- gate_proj

# SFT trainer config from trl
bf16: true
do_eval: true
dataset_text_field: text
gradient_accumulation_steps: 1
gradient_checkpointing: true
gradient_checkpointing_kwargs:
  use_reentrant: false
learning_rate: 1e-05
logging_strategy: steps
log_level: info
logging_steps: 1
push_to_hub: false # set as false on purpose
hub_strategy: every_save
hub_private_repo: true
hub_model_id: upup-ashton-wang # TODO

max_grad_norm: 1
max_seq_length: 2048
num_train_epochs: 3
overwrite_output_dir: true
packing: false
per_device_train_batch_size: 8
report_to:
- wandb
save_strategy: steps
save_steps: 500
save_total_limit: 100
seed: 42
warmup_ratio: 0.05

