name: search-run_optimal_control_bs_eta

init_latent:
  random: False

  # ---------= [Random] =---------
  seed_st: 0
  seed_ed: 1023

  # ---------= [Deterministic] =---------
  seed_list: 0
  seed_auto_increment: True

eps:
  random: False

  # ---------= [Random] =---------
  seed_st: 3072
  seed_ed: 4095

  # ---------= [Deterministic] =---------
  seed_list: 3072
  seed_auto_increment: False

eta:
  random: True

  # ---------= [Random] =---------
  eta_low: 0.0
  eta_high: 1.0

  # ---------= [Deterministic] =---------
  default_eta: None

prompt_list:
  num_prompt: 4

  prompt_manager_dict:
    prompt_manager_type: HumanPreferenceDataset_v2
    cfg_yaml_path: null

    # ---------= [HumanPreferenceDataset v2] =---------
    category_name_list: 
      - anime
      - concept-art
      - paintings
      - photo

task:
  num_sample_per_prompt: 2

sample:
  prompt_2: null
  negative_prompt: "low quality, blurry, ugly, oversaturated"
  negative_prompt_2: null

  height: 512
  width: 512

  down_sampling_ratio: 8

  num_inference_step: 5
  guidance_scale: 6.5

promptist:
  enable: False
  
  cfg_yaml_path: ./config/model/promptist.yaml

golden_noise:
  enable: False

  cfg_yaml_path: ./config/model/golden_noise.yaml

save:
  save_root_path: ./tmp/run_optimal_control_bs_eta

reward_model:
  reward_model_type: hps_v2

  cal_dynamics_batch_size: 40
  cal_intermediate_reward_batch_size: 40
  cal_final_reward_batch_size: 40

  # ---------= [reward shaping] =---------
  reward_shaping_policy: "latent_reward"

  cal_intermediate_reward_policy: "immediate_posterior_mean"

  # ---------= [look_ahead] =---------
  num_look_ahead_step: 2

  # ---------= [discount] =---------
  gamma: 0.99

lru_cache:
  num_gpu_resident_lim: 1000

beam_search:
  # ---------= [Beam Search] =---------
  beam_search:
    num_beam: 4
    num_candidate_per_beam: 2

  # ---------= [MDP Modeling] =---------
  mdp_modeling: "sparse_reward"
  # mdp_modeling: "max_reward"

  # ---------= [Expansion Policy] =---------
  expansion:
    expansion_action_sampling_policy: "uniform"

  # ---------= [NFE Limit] =---------
  nfe_limit:
    nfe_cal_dynamics_lim: 999
    nfe_cal_intermediate_reward_lim: 1e9
    nfe_cal_final_reward_lim: 1e9

display:
  display_trajectory: False

  display_selected_node_depth: True

  display_cal_state_value: False

  display_reward_sum_to_leaf: False

  display_beta_mode_update: False
