model_name_or_path: "/models/Qwen/Qwen-VL-Chat"
output_dir: null  # to be set by the script
fix_vit: true
num_train_epochs: 3
per_device_train_batch_size: 2
per_device_eval_batch_size: 2
gradient_accumulation_steps: 8
evaluation_strategy: "steps"
eval_steps: 500
save_strategy: "steps"
save_steps: 100
save_total_limit: 10
learning_rate: 1e-5
weight_decay: 0.05
adam_beta2: 0.98
warmup_ratio: 0.1
lr_scheduler_type: "cosine"
logging_steps: 10
report_to: wandb
run_name: silkie-paperconfig
model_max_length: 2048
gradient_checkpointing: true
use_lora: true
bf16: false
tf32: false
fp16: true
logging_first_step: true
remove_unused_columns: false
