# defaults
DEFAULT_GPU_ID=0

# GPU ID
gpu_id=${1:-$DEFAULT_GPU_ID}
export CUDA_VISIBLE_DEVICES=${gpu_id}

EPS_SEED_ST=3072  # [3072, 4096, 5120]
eps_seed_st=${2:-$EPS_SEED_ST}

EPS_SEED_ED=4095  # [4095, 5119, 6143]
eps_seed_ed=${3:-$EPS_SEED_ED}


python main.py \
    exp_name.default_exp_name="mcts_eps_999" \
    vae_decode_batch_size=1 \
    seed=42 \
    pipeline=sd_v1_4 \
    task=search/run_optimal_control_mcts_eps/sd_v1_4/template \
    task.init_latent.seed_list=0 \
    task.eps.random=True \
    task.eps.seed_st=${eps_seed_st} \
    task.eps.seed_ed=${eps_seed_ed} \
    task.eta.random=False \
    task.eta.default_eta=[1.0] \
    task.prompt_list.num_prompt=50 \
    task.prompt_list.prompt_manager_dict.prompt_manager_type="HumanPreferenceDataset_v2" \
    task.prompt_list.prompt_manager_dict.cfg_yaml_path="./config/dataset/hpd_v2_100.yaml" \
    task.task.num_sample_per_prompt=2 \
    task.sample.height=512 \
    task.sample.width=512 \
    task.sample.num_inference_step=15 \
    task.reward_model.reward_model_type="hps_v2" \
    task.reward_model.cal_dynamics_batch_size=20 \
    task.reward_model.cal_intermediate_reward_batch_size=20 \
    task.reward_model.cal_final_reward_batch_size=20 \
    task.reward_model.reward_shaping_policy="latent_reward" \
    task.mcts.mode.mdp_modeling="max_reward" \
    task.mcts.mode.enable_pseudo_latent_as_final_depth=5 \
    task.mcts.mode.value_policy="max" \
    task.mcts.mode.pseudo_latent_as_final=True \
    task.mcts.ucb.exploration_coef=2.0 \
    task.mcts.selection.selection_depth_lim=12 \
    task.mcts.expansion.expansion_action_sampling_policy="uniform" \
    task.mcts.nfe_limit.nfe_cal_dynamics_lim=999 \
    task.mcts.nfe_limit.nfe_cal_intermediate_reward_lim=1e9 \
    task.mcts.nfe_limit.nfe_cal_final_reward_lim=1e9 \
