name: search-optimal_control_mcts


init_latent:
  root_path: ./tmp/init_latent_or_eps

  random: False
  # random: True

  # ---------= [Random] =---------
  seed_st: 0
  seed_ed: 1023

  # ---------= [Deterministic] =---------
  seed_list: 0
  seed_auto_increment: True

eps:
  root_path: ./tmp/init_latent_or_eps

  random: False
  # random: True

  # ---------= [Random] =---------
  seed_st: 1024
  seed_ed: 2047

  # ---------= [Deterministic] =---------
  seed_list: 1024
  seed_auto_increment: True

sample:
  prompt: "A 3D illustrated chubby room with studio lighting."
  prompt_2: null
  negative_prompt: "low quality, blurry, ugly, oversaturated"
  negative_prompt_2: null

  height: 512
  width: 512

  down_sampling_ratio: 8

  # num_inference_step: 5
  num_inference_step: 8
  # num_inference_step: 10
  # num_inference_step: 15
  # num_inference_step: 20
  # num_inference_step: 100
  guidance_scale: 6.5

task:
  num_sample: 2
  # num_sample: 5
  
  batch_size: 2  # discarded

save:
  save_root_path: "./tmp/optimal_control_mcts"

reward_model:
  # reward_model_type: "color_channel_reward"
  reward_model_type: "hps_v2"

  cal_dynamics_batch_size: 10
  cal_intermediate_reward_batch_size: 10
  cal_final_reward_batch_size: 10

  disable_intermediate_reward: False
  # disable_intermediate_reward: True

  cal_intermediate_reward_policy: "immediate_posterior_mean"
  # cal_intermediate_reward_policy: "immediate_score_function"
  # cal_intermediate_reward_policy: "look_ahead"
  # cal_intermediate_reward_policy: "sequential"
  # cal_intermediate_reward_policy: "discount"

  # ---------= [look_ahead] =---------
  num_look_ahead_step: 2
  # num_look_ahead_step: 3

  # ---------= [discount] =---------
  gamma: 0.99

  # use_difference_reward: False
  use_difference_reward: True

action_space:
  eta_low: 0.0
  eta_high: 1.0

lru_cache:
  num_gpu_resident_lim: 1000

mcts:
  # ---------= [Upper Confidence Bound (UCB)] =---------
  exploration_coef: 1.0  # in range [1.0, 1.25]
  depth_coef: 1.25  # in range[1.0, 1.25]

  # ---------= [Expansion Policy] =---------
  expansion_action_sampling_policy: "uniform"
  # expansion_action_sampling_policy: "optimal_control"
  expansion_default_action_list: null  # discarded
  # expansion_enable_importance_sampling: True
  expansion_enable_importance_sampling: False
  expansion_importance_sampling_J_star_scaling_factor: 0.5
  # expansion_importance_sampling_J_star_scaling_factor: 0.95
  expansion_importance_sampling_eps: 1e-8
  expansion_importance_sampling_verbose: True
  num_per_iteration_selection: 1
  per_iteration_expansion_lim: 2

  # ---------= [Simulation Policy] =---------
  simulation_action_sampling_policy: "uniform"
  # simulation_action_sampling_policy: "deterministic"
  simulation_default_action_list: 0.0

  # ---------= [NFE Limit] =---------
  nfe_cal_dynamics_lim: 40
  nfe_cal_intermediate_reward_lim: 40
  nfe_cal_final_reward_lim: 40

  # ---------= [Optimal Control] =---------
  # optimal_control_online_update: True
  optimal_control_online_update: False
  optimal_control_update_reward_threshold: 0.001
  optimal_control_omega_z: 0.5
  optimal_control_omega_eta: 0.01
  optimal_control_finite_difference_accuracy_order: "SECOND"
  optimal_control_finite_difference_eps: 1e-8
  optimal_control_force_positive_semi_definite_max_tolerance: 1e-8
  optimal_control_force_positive_definite_max_tolerance: 1e-8
  optimal_control_clamp_eps: 1e-8

  # ---------= [Beta Distribution Parameterization] =---------
  beta_parameterization: False
  # beta_parameterization: True
  # beta_online_update: True
  beta_online_update: False
  beta_zeta_list: 10
  beta_update_reward_threshold: 1e-8
  beta_clamp_eps: 1e-8

display:
  display_trajectory: False
  # display_trajectory: True

  # show_selected_node_depth: True
  show_selected_node_depth: False

  # show_cal_state_value: True
  show_cal_state_value: False

  # show_reward_sum_to_leaf: True
  show_reward_sum_to_leaf: False
