training_config:
  wandb_project: "uniworld"
  wandb_name: "flux_kontext_qwenvl_stage2_1024_adamw"
  seed: 203
  output_dir: ./checkpoints/flux_kontext_qwenvl_stage2_1024_adamw
  logging_dir: ./logs/flux_kontext_qwenvl_stage2_1024_adamw
  max_train_steps: 200000
  optimizer: "adamw"
  learning_rate: 1.0e-6
  adam_beta1: 0.9
  adam_beta2: 0.99
  adam_epsilon: 1e-8
  adam_weight_decay: 0.0
  mixed_precision: "bf16"
  report_to: "wandb"
  gradient_checkpointing: true
  num_train_epochs: 1
  lr_scheduler: "constant"
  lr_warmup_steps: 0
  lr_num_cycles: 1
  lr_power: 1.0
  resume_from_checkpoint: 
  validation_steps: 2000
  checkpointing_steps: 5000
  drop_t5_rate: 1.0
  discrete_timestep: false
  mask_weight_type: 'log'
  gradient_accumulation_steps: 1
  # ema_deepspeed_config_file: scripts/accelerate_configs/zero3.json
  # ema_decay: 0.999

model_config:
  pretrained_lvlm_name_or_path: checkpoints/UniWorld-Qwen2.5-VL-7B-Instruct-FLUX.1-dev
  # ema_pretrained_lvlm_name_or_path: /opt/dlami/nvme/wyh/code/UniWorld-V1/checkpoints/UniWorld-Qwen2.5-VL-7B-Instruct-FLUX.1-dev
  pretrained_denoiser_name_or_path: checkpoints/FLUX.1-Kontext-dev
  guidance_scale: 1.0
  only_tune_mlp2: false
  with_tune_mlp2: true
  pretrained_mlp2_path: checkpoints/denoise_projector.bin
  joint_ref_feature: true
  only_use_t5: false
  flux_train_layer_idx: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56]
  vae_fp32: true
  compile_flux: false
  compile_qwen2p5vl: true
  only_tune_image_branch: true

dataset_config:
  padding_side: "left"
  ocr_enhancer: false
  dataset_type: qwen2p5vl
  min_pixels: 200704
  max_pixels: 200704
  data_txt: gpt-image-edit/data.txt
  batch_size: 1
  num_workers: 16
  height: 1024
  width: 1024
  pin_memory: true

  validation_cannyt2i_prompt: "Render an image where fine details and textures are filled in based on the provided canny lines, influenced by 'white and black dogs on snow covered ground during daytime'.\n<image>"
  validation_cannyt2i_path: assets/canny.jpg
  
  validation_poset2i_prompt: "Create a person image that conforms to the input pose, with realistic anatomy and appearance related to 'Two individuals sit on a wooden bench in a park, with one person stretching their arms above their head and the other engrossed in their mobile device.'. <image>"
  validation_poset2i_path: assets/pose.jpg

  validation_TRANSFERit2i_prompt: "Convert an image to Ghibli style. <image>"
  validation_TRANSFERit2i_path: assets/bus.png

  validation_TRYONit2i_prompt: "Integrate the striped cotton sweater into the person's overall look, making it appear natural and stylish. <image>"
  validation_TRYONit2i_path: assets/extract_dst.jpg

  validation_REPLACEit2i_prompt: "replace motorcycle located in the lower center region of the image with a black bicycle <image>"
  validation_REPLACEit2i_path: assets/replace_src.png


  
