name: 8101_potter_lift_racs
manual_seed: 0
mixed_precision: fp16
gradient_accumulation_steps: 1

# dataset and data loader settings
datasets:
  train:
    name: LoraDataset
    concept_list: datasets/data_cfgs/MixofShow/single-concept/characters/real/potter.json
    use_caption: true
    use_mask: true
    instance_transform:
      - { type: HumanResizeCropFinalV3, size: 512, crop_p: 0.5 }
      - { type: ToTensor }
      - { type: Normalize, mean: [ 0.5 ], std: [ 0.5 ] }
      - { type: ShuffleCaption, keep_token_num: 1 }
      - { type: EnhanceText, enhance_type: human }
    replace_mapping:
      <TOK>: <potter1> <potter2>
    batch_size_per_gpu: 2
    dataset_enlarge_ratio: 500

  val_vis:
    name: PromptDataset
    prompts: datasets/validation_prompts/single-concept/characters/test_man.txt
    num_samples_per_prompt: 8
    latent_size: [ 4,64,64 ]
    replace_mapping:
      <TOK>: <potter1> <potter2>
    batch_size_per_gpu: 4

models:
  pretrained_path: experiments/pretrained_models/chilloutmix
  enable_edlora: true  # true means ED-LoRA, false means vallina LoRA
  finetune_cfg:
    text_embedding:
      enable_tuning: false
      lr: !!float 1e-3
    text_encoder:
      enable_tuning: true
      lora_cfg:
        rank: 4
        alpha: 1.0
        where: CLIPAttention
      lr: !!float 1e-5
    unet:
      enable_tuning: true
      lora_cfg:
        rank: 16
        alpha: 1.0
        where: Attention
      lr: !!float 5e-5
  new_concept_token: <potter1>+<potter2>
  initializer_token: <rand-0.013>+man
  noise_offset: 0.01
  attn_reg_weight: 0.01
  reg_full_identity: false
  use_mask_loss: true
  gradient_checkpoint: false
  enable_xformers: false

# path
path:
  pretrain_network: ~

# training settings
train:
  optim_g:
    lr: !!float 0.0 # no use since we define different component lr in model
    weight_decay: 0.01
    betas: [ 0.9, 0.999 ] # align with taming
    
  # dropkv  
  unet_kv_drop_rate: 0
  scheduler: linear
  emb_norm_threshold: !!float 5.5e-1

# validation settings
val:
  val_during_save: true
  compose_visualize: true
  alpha_list: [0.6, 0.7, 1.0] # 0 means only visualize embedding (without lora weight)
  sample:
    num_inference_steps: 50
    guidance_scale: 7.5

# logging settings
logger:
  print_freq: 10
  save_checkpoint_freq: !!float 10000
