dataset: 
  train:
    target: KnobGen.data.MultiGen20M
    params:
      path_json: "base_url/json_files/aesthetics_plus_all_group_{}_all.json"
      path_meta: "base_url"
      resolution: 512
      none_loop: 0
      p_drop_text: 0.1
      p_drop_condition: 0.0
      conditions: ['hed']
      prompt_json: base_url/json_files/llava_prompts.json
      hints:
        - task: 'hed'
          path: 'base_url/conditions/'
  validation:
      num_inference_steps: 50
      guidance_scale: 8.
      target: KnobGen.data.MultiGen20M
      params:
        path_json: "small_dataset/json_files/aesthetics_plus_all_group_{}_all.json"
        path_meta: "small_dataset"
        resolution: 512
        none_loop: 0
        p_drop_text: 0.0
        p_drop_condition: 0.00
        conditions: ['hed']
        prompt_json: ''
        hints:
          - task: 'hed'
            path: 'small_dataset/conditions/'
  test:
model:
  target: KnobGen.models.VisionAdapterTransformer2
  params:
    in_channels_image: 1024
    in_channels_text: 768
    max_position_embeddings_text: 77
    hidden_channels: 1024
    output_channels: 768
    number_heads: 8
    num_transformer_layers: 6
    num_image_token: 256
train:
  random_contion: True
  pretrained_model_path: 'ddpm_models'
  pretrained_image_encoder: 'openai/clip-vit-large-patch14'
  pretrained_adapter_sketch: 'TencentARC/t2iadapter_sketch_sd15v2'
  checkpoint_path: ''
  is_debug: False
  global_seed: 42
  output_dir: 'output'
  num_workers: 16
  train_batch_size: 32
  valid_batch_size: 1
  noise_scheduler_kwargs: 
    num_train_timesteps: 1000
    beta_start:          0.00085
    beta_end:            0.012
    beta_schedule:       "linear"
    steps_offset:        1
    clip_sample:         false
  max_train_epoch: 2000
  max_train_steps: -1
  scale_lr: False
  checkpointing_epochs: 20
  checkpointing_steps: -1
  validation_steps: 1000
  validation_steps_tuple: [1, 50, 100, 400, 750]
  gradient_checkpointing: False
  mixed_precision_training: False
  max_grad_norm: 1
  enable_xformers_memory_efficient_attention: False
optimize:
  learning_rate: 1e-5
  adam_beta1: 0.9
  adam_beta2: 0.999
  adam_weight_decay: 1e-2
  adam_epsilon: 1e-08
  lr_warmup_steps: 500
  lr_scheduler: "constant"
  gradient_accumulation_steps: 1
