dataset: GRAMMAR
output_dir: YOUR_CKPT_OUTPUT_DIR
run_name: REVISUAL_COLD_START 
### model
model_name_or_path: Qwen/Qwen2.5-VL-3B-Instruct
trust_remote_code: true
### method
stage: sft
do_train: true
finetuning_type: full
# deepspeed: examples/deepspeed/ds_z3_offload_config.json
deepspeed: cold_start/examples/deepspeed/ds_z2_config.json

### dataset
# dataset: bespoke_cot
dataset_dir: GRAMMAR_DIR
template: qwen2_vl
# cutoff_len: 32768
cutoff_len: 32768
overwrite_cache: true
preprocessing_num_workers: 1024

### output
logging_steps: 1
save_steps: 150
save_total_limit: 10
save_only_model: false
save_strategy: 'steps'
plot_loss: true
overwrite_output_dir: false
report_to: wandb


### train
per_device_train_batch_size: 4
gradient_accumulation_steps: 4
learning_rate: 1.0e-5
# weight_decay: 1.0e-4
num_train_epochs: 5
lr_scheduler_type: cosine
warmup_ratio: 0.05
bf16: true
ddp_timeout: 540000000
# resume_from_checkpoint: true  # 初始训练时注释掉

### eval
val_size: 0
per_device_eval_batch_size: 1
eval_strategy: 'no'
eval_steps: 10000000

###tokenizer
add_tokens: <think>,</think>,<answer>,</answer>
skip_special_tokens: false
resize_vocab: true

