training_config:
  wandb_project: "uniworld"
  wandb_name: "flux_qwen2p5vl_7b_vlm_stage1_512_adamw"
  seed: 203
  output_dir: ./checkpoints/flux_qwen2p5vl_7b_vlm_stage1_512_adamw
  logging_dir: ./logs/flux_qwen2p5vl_7b_vlm_stage1_512_adamw
  max_train_steps: 500000
  optimizer: "adamw"
  learning_rate: 1e-3
  adam_beta1: 0.9
  adam_beta2: 0.99
  adam_epsilon: 1e-8
  adam_weight_decay: 0.0
  mixed_precision: "bf16"
  report_to: "wandb"
  gradient_checkpointing: false
  num_train_epochs: 1
  lr_scheduler: "constant"
  lr_warmup_steps: 0
  lr_num_cycles: 1
  lr_power: 1.0
  resume_from_checkpoint: null
  validation_steps: 1000
  checkpointing_steps: 2000
  drop_t5_rate: 1.0
  discrete_timestep: false
  mask_weight_type: 'log'
  gradient_accumulation_steps: 1
  ema_deepspeed_config_file: scripts/accelerate_configs/zero3.json
  ema_decay: 0.999

model_config:
  pretrained_lvlm_name_or_path: /mnt/data/checkpoints/UniVA/UniVA-Qwen2.5-VL-7B-Instruct-FLUX.1-dev-fp32
  ema_pretrained_lvlm_name_or_path: /mnt/data/checkpoints/UniVA/UniVA-Qwen2.5-VL-7B-Instruct-FLUX.1-dev-fp32
  pretrained_denoiser_name_or_path: /mnt/data/checkpoints/black-forest-labs/FLUX.1-dev/
  guidance_scale: 1.0
  only_tune_mlp2: true
  with_tune_mlp2: false
  pretrained_mlp2_path: null
  only_tune_mlp3: false
  with_tune_mlp3: false
  pretrained_mlp3_path: null  
  only_tune_siglip_mlp: false
  with_tune_siglip_mlp: false
  pretrained_siglip_mlp_path: null
  joint_ref_feature: false
  only_use_t5: false
  only_tune_image_branch: false

dataset_config:
  padding_side: "left"
  ocr_enhancer: false
  dataset_type: qwen2p5vl
  min_pixels: 200704
  max_pixels: 200704
  data_txt: /mnt/data/lb/Remake/UniWorld/data.txt
  batch_size: 1
  num_workers: 16
  height: 512
  width: 512
  pin_memory: true
  
  validation_cannyt2i_prompt: "Render an image where fine details and textures are filled in based on the provided canny lines, influenced by 'white and black dogs on snow covered ground during daytime'.\n<image>"
  validation_cannyt2i_path: assets/canny.jpg
  
  validation_poset2i_prompt: "Create a person image that conforms to the input pose, with realistic anatomy and appearance related to 'Two individuals sit on a wooden bench in a park, with one person stretching their arms above their head and the other engrossed in their mobile device.'. <image>"
  validation_poset2i_path: assets/pose.jpg

  validation_TRANSFERit2i_prompt: "Convert an image to Ghibli style. <image>"
  validation_TRANSFERit2i_path: assets/bus.png

  validation_TRYONit2i_prompt: "Integrate the striped cotton sweater into the person's overall look, making it appear natural and stylish. <image>"
  validation_TRYONit2i_path: assets/extract_dst.jpg

  validation_REPLACEit2i_prompt: "replace motorcycle located in the lower center region of the image with a black bicycle <image>"
  validation_REPLACEit2i_path: assets/replace_src.png

  
