## data
train_data_path : data/source_train_data.json
eval_data_path : data/train_LLAVA_test.json
save_dir :
image_heavy_ratio : 0.25
text_heavy_ratio : 0.25

## Log Info
wandb_project: 
wandb_entity: 
wandb_run_name: 
wandb_run_id : 

model : llava-1.5-7b
## Finetune Type
finetune_type: full
## Consistency Type
consistency_type : KL
consistency_loss_weight : 0.01
temperature : 1

## train model
deepspeed_config : "modules/models/deepspeed/zero3_offload.json"
seed: 2024
device : cuda
per_device_batch_size: 8
gradient_accumulation_steps : 1
epochs : 1
learning_rate : 2.0e-5

lr_scheduler_type: cosine
warmup_ratio: 0.03
adam_epsilon: 1.0e-6

weight_decay : 0.0001
logging_steps : 1
fp16 : False
bf16 : True
tf32 : True

gradient_checkpointing : True
resume_from_checkpoint : ""

## Evaluation
save_eval_steps : 600
save_total_limit : 3
val_period: 1
max_new_tokens : 100

