#!/bin/bash

# defaults
DEFAULT_GPU_ID=0

# GPU ID
gpu_id=${1:-$DEFAULT_GPU_ID}
export CUDA_VISIBLE_DEVICES=${gpu_id}

python main.py \
    vae_decode_batch_size=1 \
    task=test

# DEFAULT_NFE=30
# DEFAULT_EXPLORATION_COEF=3.0

# default_nfe=${2:-$DEFAULT_NFE}
# exploration_coef=${3:-$DEFAULT_EXPLORATION_COEF}

# # 15-step SDXL
# # color_channel_reward
# # exploration_coef = 3.0
# # latent_reward, immediate_posterior_mean
# # max_reward
# # average
# # beta, value_gradient, 10
# # back_propagation
# # 0.15, 0.5
# # NFE 150
# # best_merged_reward_list: 
# # best_final_reward_list: 
# # python main.py \
# #     pipeline=sdxl \
# #     task=search/run_optimal_control_mcts/sdxl/template \
# #     task.prompt_list.num_prompt=2 \
# #     task.sample.num_inference_step=5 \
# #     task.task.num_sample_per_prompt=2 \
# #     task.reward_model.reward_model_type="color_channel_reward" \
# #     task.reward_model.cal_dynamics_batch_size=20 \
# #     task.reward_model.cal_intermediate_reward_batch_size=20 \
# #     task.reward_model.cal_final_reward_batch_size=20 \
# #     task.reward_model.reward_shaping_policy="latent_reward" \
# #     task.reward_model.cal_intermediate_reward_policy="immediate_posterior_mean" \
# #     task.mcts.mode.mdp_modeling="max_reward" \
# #     task.mcts.mode.value_policy="average" \
# #     task.mcts.mode.pseudo_latent_as_final=True \
# #     task.mcts.ucb.exploration_coef=${exploration_coef} \
# #     task.mcts.selection.selection_depth_lim=4 \
# #     task.mcts.expansion.expansion_action_sampling_policy="beta" \
# #     task.mcts.nfe_limit.nfe_cal_dynamics_lim=${default_nfe} \
# #     task.mcts.nfe_limit.nfe_cal_intermediate_reward_lim=${default_nfe} \
# #     task.mcts.nfe_limit.nfe_cal_final_reward_lim=${default_nfe} \
# #     task.mcts.beta.update_policy="value_gradient" \
# #     task.mcts.beta.value_gradient_update_time="back_propagation" \
# #     task.mcts.beta.update_step_size=0.15 \
# #     task.mcts.beta.max_update_bias=0.5 \
# #     task.mcts.beta.zeta_list=10 \


# # 15-step SDXL
# # clip_score
# # exploration_coef = 3.0
# # latent_reward, immediate_posterior_mean
# # max_reward
# # average
# # beta, value_gradient, 10
# # back_propagation
# # 0.15, 0.5
# # NFE 150
# # best_merged_reward_list: 
# # best_final_reward_list: 
# # python main.py \
# #     pipeline=sdxl \
# #     task=search/run_optimal_control_mcts/sdxl/template \
# #     task.prompt_list.num_prompt=5 \
# #     task.sample.num_inference_step=15 \
# #     task.task.num_sample_per_prompt=2 \
# #     task.reward_model.reward_model_type="clip_score" \
# #     task.reward_model.cal_dynamics_batch_size=20 \
# #     task.reward_model.cal_intermediate_reward_batch_size=20 \
# #     task.reward_model.cal_final_reward_batch_size=20 \
# #     task.reward_model.reward_shaping_policy="latent_reward" \
# #     task.reward_model.cal_intermediate_reward_policy="immediate_posterior_mean" \
# #     task.mcts.mode.mdp_modeling="max_reward" \
# #     task.mcts.mode.value_policy="average" \
# #     task.mcts.mode.pseudo_latent_as_final=True \
# #     task.mcts.ucb.exploration_coef=${exploration_coef} \
# #     task.mcts.selection.selection_depth_lim=14 \
# #     task.mcts.expansion.expansion_action_sampling_policy="beta" \
# #     task.mcts.nfe_limit.nfe_cal_dynamics_lim=${default_nfe} \
# #     task.mcts.nfe_limit.nfe_cal_intermediate_reward_lim=${default_nfe} \
# #     task.mcts.nfe_limit.nfe_cal_final_reward_lim=${default_nfe} \
# #     task.mcts.beta.update_policy="value_gradient" \
# #     task.mcts.beta.value_gradient_update_time="back_propagation" \
# #     task.mcts.beta.update_step_size=0.15 \
# #     task.mcts.beta.max_update_bias=0.5 \
# #     task.mcts.beta.zeta_list=10 \


# # test only
# # python main.py \
# #     pipeline=sd_v1_4 \
# #     task=search/run_optimal_control_mcts/sd_v1_4/template \
# #     task.prompt_list.num_prompt=2 \
# #     task.sample.num_inference_step=5 \
# #     task.task.num_sample_per_prompt=2 \
# #     task.reward_model.reward_model_type="clip_score" \
# #     task.reward_model.cal_dynamics_batch_size=20 \
# #     task.reward_model.cal_intermediate_reward_batch_size=20 \
# #     task.reward_model.cal_final_reward_batch_size=20 \
# #     task.reward_model.reward_shaping_policy="latent_reward" \
# #     task.reward_model.cal_intermediate_reward_policy="immediate_posterior_mean" \
# #     task.mcts.mode.mdp_modeling="max_reward" \
# #     task.mcts.mode.value_policy="average" \
# #     task.mcts.mode.pseudo_latent_as_final=True \
# #     task.mcts.ucb.exploration_coef=${exploration_coef} \
# #     task.mcts.selection.selection_depth_lim=4 \
# #     task.mcts.expansion.expansion_action_sampling_policy="beta" \
# #     task.mcts.nfe_limit.nfe_cal_dynamics_lim=${default_nfe} \
# #     task.mcts.nfe_limit.nfe_cal_intermediate_reward_lim=${default_nfe} \
# #     task.mcts.nfe_limit.nfe_cal_final_reward_lim=${default_nfe} \
# #     task.mcts.beta.update_policy="value_gradient" \
# #     task.mcts.beta.value_gradient_update_time="back_propagation" \
# #     task.mcts.beta.update_step_size=0.15 \
# #     task.mcts.beta.max_update_bias=0.5 \
# #     task.mcts.beta.zeta_list=10 \


# # 15-step SDXL
# # clip_score
# # exploration_coef = 3.0
# # latent_reward, immediate_posterior_mean
# # max_reward
# # average
# # beta, value_gradient, 10
# # back_propagation
# # 0.15, 0.5
# # NFE 150
# # best_merged_reward_list: 
# # best_final_reward_list: 
# # python main.py \
# #     pipeline=sdxl \
# #     task=search/run_optimal_control_mcts/sdxl/template \
# #     task.prompt_list.num_prompt=5 \
# #     task.sample.num_inference_step=5 \
# #     task.task.num_sample_per_prompt=2 \
# #     task.reward_model.reward_model_type="clip_score" \
# #     task.reward_model.cal_dynamics_batch_size=20 \
# #     task.reward_model.cal_intermediate_reward_batch_size=20 \
# #     task.reward_model.cal_final_reward_batch_size=20 \
# #     task.reward_model.reward_shaping_policy="latent_reward" \
# #     task.reward_model.cal_intermediate_reward_policy="immediate_posterior_mean" \
# #     task.mcts.mode.mdp_modeling="max_reward" \
# #     task.mcts.mode.value_policy="average" \
# #     task.mcts.mode.pseudo_latent_as_final=True \
# #     task.mcts.ucb.exploration_coef=${exploration_coef} \
# #     task.mcts.selection.selection_depth_lim=14 \
# #     task.mcts.expansion.expansion_action_sampling_policy="beta" \
# #     task.mcts.nfe_limit.nfe_cal_dynamics_lim=${default_nfe} \
# #     task.mcts.nfe_limit.nfe_cal_intermediate_reward_lim=${default_nfe} \
# #     task.mcts.nfe_limit.nfe_cal_final_reward_lim=${default_nfe} \
# #     task.mcts.beta.update_policy="value_gradient" \
# #     task.mcts.beta.value_gradient_update_time="back_propagation" \
# #     task.mcts.beta.update_step_size=0.15 \
# #     task.mcts.beta.max_update_bias=0.5 \
# #     task.mcts.beta.zeta_list=10 \




# # 15-step SD v1.4
# # clip_score
# # exploration_coef = 3.0
# # latent_reward, immediate_posterior_mean
# # max_reward
# # average
# # beta, value_gradient, 10
# # back_propagation
# # 0.15, 0.5
# # NFE 150
# # best_merged_reward_list: 
# # best_final_reward_list: 
# # python main.py \
# #     pipeline=sd_v1_4 \
# #     task=search/run_optimal_control_mcts/sd_v1_4/template \
# #     task.prompt_list.num_prompt=5 \
# #     task.sample.num_inference_step=5 \
# #     task.task.num_sample_per_prompt=2 \
# #     task.reward_model.reward_model_type="clip_score" \
# #     task.reward_model.cal_dynamics_batch_size=20 \
# #     task.reward_model.cal_intermediate_reward_batch_size=20 \
# #     task.reward_model.cal_final_reward_batch_size=20 \
# #     task.reward_model.reward_shaping_policy="latent_reward" \
# #     task.reward_model.cal_intermediate_reward_policy="immediate_posterior_mean" \
# #     task.mcts.mode.mdp_modeling="max_reward" \
# #     task.mcts.mode.value_policy="average" \
# #     task.mcts.mode.pseudo_latent_as_final=True \
# #     task.mcts.ucb.exploration_coef=${exploration_coef} \
# #     task.mcts.selection.selection_depth_lim=4 \
# #     task.mcts.expansion.expansion_action_sampling_policy="beta" \
# #     task.mcts.nfe_limit.nfe_cal_dynamics_lim=${default_nfe} \
# #     task.mcts.nfe_limit.nfe_cal_intermediate_reward_lim=${default_nfe} \
# #     task.mcts.nfe_limit.nfe_cal_final_reward_lim=${default_nfe} \
# #     task.mcts.beta.update_policy="value_gradient" \
# #     task.mcts.beta.value_gradient_update_time="back_propagation" \
# #     task.mcts.beta.update_step_size=0.15 \
# #     task.mcts.beta.max_update_bias=0.5 \
# #     task.mcts.beta.zeta_list=10 \



# ./script/cal_metric/hps_v2/sd_v1_4/HumanPreferenceDataset_v2/baseline.sh ${gpu_id} 999


