<div align="center">

<div id="user-content-toc" style="margin-bottom: 50px">
  <ul align="center" style="list-style: none;">
    <summary>
      <h1 style="font-size:1.76rem">
        Value Flows
      </h1>
    </summary>
  </ul>
</div>

</div>

This code is the supplementary code for Value Flows. This code is written on top of the [OGBench](https://github.com/seohongpark/ogbench) repository. We put commands to run the code below. 

### Value Flows hyperparameters

<details>
<summary><b>Click to expand the full list of commands</b></summary>

```
# Value Flows on cube-double-play-singletask-{task1, task2, task3, task4, task5}-v0
python main.py --env_name=cube-double-play-singletask-{task1, task2, task3, task4, task5}-v0 --agent=agents/value_flows.py --agent.discount=0.995 --agent.alpha_critic_td_vf=1 --agent.critic_loss_type=q-learning --next_action_extraction=sfbc --policy_extraction=sfbc --agent.ensemble_weight_type=target_ret_std_jac_est --agent.ensemble_weight_temp=3 --agent.clip_flow_actions=True --agent.value_layer_norm=True --agent.actor_layer_norm=True --agent.ret_agg=mean --agent.q_agg=mean --seed=10

# Value Flows on puzzle-3x3-play-singletask-{task1, task2, task3, task4, task5}-v0
python main.py --env_name=puzzle-3x3-play-singletask-{task1, task2, task3, task4, task5}-v0 --agent=agents/value_flows.py --agent.discount=0.99 --agent.alpha_critic_td_vf=2 --agent.critic_loss_type=q-learning --next_action_extraction=sfbc --policy_extraction=sfbc --agent.ensemble_weight_type=target_ret_std_jac_est --agent.ensemble_weight_temp=0.3  --agent.clip_flow_actions=True --agent.value_layer_norm=True --agent.actor_layer_norm=True --agent.ret_agg=min --agent.q_agg=mean --seed=10

# Value Flows on scene-play-singletask-{task1, task2, task3, task4, task5}-v0
python main.py --env_name=scene-play-singletask-{task1, task2, task3, task4, task5}-v0 --agent=agents/value_flows.py --agent.discount=0.99 --agent.alpha_critic_td_vf=1 --agent.critic_loss_type=q-learning --next_action_extraction=sfbc --policy_extraction=sfbc --agent.ensemble_weight_type=target_ret_std_jac_est --agent.ensemble_weight_temp=0.3  --agent.clip_flow_actions=True --agent.value_layer_norm=True --agent.actor_layer_norm=True --agent.ret_agg=min --agent.q_agg=mean --seed=10

# Value Flows on puzzle-4x4-play-singletask-{task1, task2, task3, task4, task5}-v0
python main.py --env_name=puzzle-4x4-play-singletask-{task1, task2, task3, task4, task5}-v0 --agent=agents/value_flows.py --agent.discount=0.99 --agent.alpha_critic_td_vf=0.3 --agent.critic_loss_type=q-learning --next_action_extraction=sfbc --policy_extraction=sfbc --agent.ensemble_weight_type=target_ret_std_jac_est --agent.ensemble_weight_temp=100 --agent.clip_flow_actions=True --agent.value_layer_norm=True --agent.actor_layer_norm=True --agent.ret_agg=mean --agent.q_agg=min --seed=10

# Value Flows on cube-triple-play-singletask-{task1, task2, task3, task4, task5}-v0
python main.py --env_name=cube-triple-play-singletask-{task1, task2, task3, task4, task5}-v0 --agent=agents/value_flows.py --agent.discount=0.995 --agent.alpha_critic_td_vf=0.3 --agent.critic_loss_type=q-learning --next_action_extraction=sfbc --policy_extraction=sfbc --agent.ensemble_weight_type=target_ret_std_jac_est --agent.ensemble_weight_temp=0.03 --agent.clip_flow_actions=True --agent.value_layer_norm=True --agent.actor_layer_norm=True --agent.ret_agg=mean --agent.q_agg=mean --seed=10

# Value Flows on visual-antmaze-medium-navigate-singletask-{task1, task2, task3, task4, task5}-v0
python main.py --env_name=visual-antmaze-medium-navigate-singletask-{task1, task2, task3, task4, task5}-v0 --agent=agents/value_flows.py --agent.discount=0.99 --agent.alpha_critic_td_vf=3 --agent.critic_loss_type=q-learning --next_action_extraction=sfbc --policy_extraction=sfbc --agent.ensemble_weight_type=target_ret_std_jac_est --agent.ensemble_weight_temp=0.03 --agent.clip_flow_actions=True --agent.value_layer_norm=True --agent.actor_layer_norm=True --agent.ret_agg=mean --agent.q_agg=mean --seed=10

# Value Flows on visual-antmaze-teleport-navigate-singletask-{task1, task2, task3, task4, task5}-v0
python main.py --env_name=visual-antmaze-teleport-navigate-singletask-{task1, task2, task3, task4, task5}-v0 --agent=agents/value_flows.py --agent.discount=0.99 --agent.alpha_critic_td_vf=3 --agent.critic_loss_type=q-learning --next_action_extraction=sfbc --policy_extraction=sfbc --agent.ensemble_weight_type=target_ret_std_jac_est --agent.ensemble_weight_temp=0.03 --agent.clip_flow_actions=True --agent.value_layer_norm=True --agent.actor_layer_norm=True --agent.ret_agg=mean --agent.q_agg=mean --seed=10

# Value Flows on visual-cube-double-play-singletask-{task1, task2, task3, task4, task5}-v0
python main.py --env_name=visual-cube-double-play-singletask-{task1, task2, task3, task4, task5}-v0 --agent=agents/value_flows.py --agent.discount=0.995 --agent.alpha_critic_td_vf=1 --agent.critic_loss_type=q-learning --next_action_extraction=sfbc --policy_extraction=sfbc --agent.ensemble_weight_type=target_ret_std_jac_est --agent.ensemble_weight_temp=0.3 --agent.clip_flow_actions=True --agent.value_layer_norm=True --agent.actor_layer_norm=True --agent.ret_agg=mean --agent.q_agg=mean --seed=10

# Value Flows on visual-scene-play-singletask-{task1, task2, task3, task4, task5}-v0
python main.py --env_name=visual-scene-play-singletask-{task1, task2, task3, task4, task5}-v0 --agent=agents/value_flows.py --agent.discount=0.99 --agent.alpha_critic_td_vf=1 --agent.critic_loss_type=q-learning --next_action_extraction=sfbc --policy_extraction=sfbc --agent.ensemble_weight_type=target_ret_std_jac_est --agent.ensemble_weight_temp=0.3 --agent.clip_flow_actions=True --agent.value_layer_norm=True --agent.actor_layer_norm=True --agent.ret_agg=min --agent.q_agg=mean --seed=10

# Value Flows on visual-puzzle-3x3-play-singletask-{task1, task2, task3, task4, task5}-v0
python main.py --env_name=visual-puzzle-3x3-play-singletask-{task1, task2, task3, task4, task5}-v0 --agent=agents/value_flows.py --agent.discount=0.99 --agent.alpha_critic_td_vf=3 --agent.critic_loss_type=q-learning --next_action_extraction=sfbc --policy_extraction=sfbc --agent.ensemble_weight_type=target_ret_std_jac_est --agent.ensemble_weight_temp=0.3 --agent.clip_flow_actions=True --agent.value_layer_norm=True --agent.actor_layer_norm=True --agent.ret_agg=min --agent.q_agg=mean --seed=10

# Value Flows on pen-{human, cloned, expert}-v1
python main.py --env_name=pen-human-v1 --agent=agents/value_flows.py --agent.discount=0.99 --agent.alpha_critic_td_vf=0.3 --agent.critic_loss_type=q-learning --next_action_extraction=sfbc --policy_extraction=sfbc --agent.ensemble_weight_type=target_ret_std_jac_est --agent.ensemble_weight_temp=1.0 --agent.clip_flow_actions=True --agent.value_layer_norm=True --agent.actor_layer_norm=True --agent.ret_agg=min --agent.q_agg=min --seed=10
python main.py --env_name=pen-cloned-v1 --agent=agents/value_flows.py --agent.discount=0.99 --agent.alpha_critic_td_vf=0.3 --agent.critic_loss_type=q-learning --next_action_extraction=sfbc --policy_extraction=sfbc --agent.ensemble_weight_type=target_ret_std_jac_est --agent.ensemble_weight_temp=1.0 --agent.clip_flow_actions=True --agent.value_layer_norm=True --agent.actor_layer_norm=True --agent.ret_agg=mean --agent.q_agg=min --seed=10
python main.py --env_name=pen-expert-v1 --agent=agents/value_flows.py --agent.discount=0.99 --agent.alpha_critic_td_vf=0.3 --agent.critic_loss_type=q-learning --next_action_extraction=sfbc --policy_extraction=sfbc --agent.ensemble_weight_type=target_ret_std_jac_est --agent.ensemble_weight_temp=0.01 --agent.clip_flow_actions=True --agent.value_layer_norm=True --agent.actor_layer_norm=True --agent.ret_agg=mean --agent.q_agg=min --seed=10

# Value Flows on door-{human, cloned, expert}-v1
python main.py --env_name=door-human-v1 --agent=agents/value_flows.py --agent.discount=0.99 --agent.alpha_critic_td_vf=0.3 --agent.critic_loss_type=q-learning --next_action_extraction=sfbc --policy_extraction=sfbc --agent.ensemble_weight_type=target_ret_std_jac_est --agent.ensemble_weight_temp=0.01 --agent.clip_flow_actions=True --agent.value_layer_norm=True --agent.actor_layer_norm=True --agent.ret_agg=mean --agent.q_agg=min --seed=10
python main.py --env_name=door-cloned-v1 --agent=agents/value_flows.py --agent.discount=0.99 --agent.alpha_critic_td_vf=0.1 --agent.critic_loss_type=q-learning --next_action_extraction=sfbc --policy_extraction=sfbc --agent.ensemble_weight_type=target_ret_std_jac_est --agent.ensemble_weight_temp=0.3 --agent.clip_flow_actions=True --agent.value_layer_norm=True --agent.actor_layer_norm=True --agent.ret_agg=min --agent.q_agg=min --seed=10
python main.py --env_name=door-expert-v1 --agent=agents/value_flows.py --agent.discount=0.99 --agent.alpha_critic_td_vf=0.1 --agent.critic_loss_type=q-learning --next_action_extraction=sfbc --policy_extraction=sfbc --agent.ensemble_weight_type=target_ret_std_jac_est --agent.ensemble_weight_temp=0.3 --agent.clip_flow_actions=True --agent.value_layer_norm=True --agent.actor_layer_norm=True --agent.ret_agg=min --agent.q_agg=min --seed=10


# Value Flows on hammer-{human, cloned, expert}-v1
python main.py --env_name=hammer-human-v1 --agent=agents/value_flows.py --agent.discount=0.99 --agent.alpha_critic_td_vf=0.3 --agent.critic_loss_type=q-learning --next_action_extraction=sfbc --policy_extraction=sfbc --agent.ensemble_weight_type=target_ret_std_jac_est --agent.ensemble_weight_temp=0.3 --agent.clip_flow_actions=True --agent.value_layer_norm=True --agent.actor_layer_norm=True --agent.ret_agg=mean --agent.q_agg=min --seed=10
python main.py --env_name=hammer-cloned-v1 --agent=agents/value_flows.py --agent.discount=0.99 --agent.alpha_critic_td_vf=0.3 --agent.critic_loss_type=q-learning --next_action_extraction=sfbc --policy_extraction=sfbc --agent.ensemble_weight_type=target_ret_std_jac_est --agent.ensemble_weight_temp=0.3 --agent.clip_flow_actions=True --agent.value_layer_norm=True --agent.actor_layer_norm=True --agent.ret_agg=mean --agent.q_agg=min --seed=10
python main.py --env_name=hammer-expert-v1 --agent=agents/value_flows.py --agent.discount=0.99 --agent.alpha_critic_td_vf=0.1 --agent.critic_loss_type=q-learning --next_action_extraction=sfbc --policy_extraction=sfbc --agent.ensemble_weight_type=target_ret_std_jac_est --agent.ensemble_weight_temp=1.0 --agent.clip_flow_actions=True --agent.value_layer_norm=True --agent.actor_layer_norm=True --agent.ret_agg=min --agent.q_agg=min --seed=10


# Value Flows on relocate-{human, cloned, expert}-v1
python main.py --env_name=relocate-human-v1 --agent=agents/value_flows.py --agent.discount=0.99 --agent.alpha_critic_td_vf=0.1 --agent.critic_loss_type=q-learning --next_action_extraction=sfbc --policy_extraction=sfbc --agent.ensemble_weight_type=target_ret_std_jac_est --agent.ensemble_weight_temp=0.01 --agent.clip_flow_actions=True --agent.value_layer_norm=True --agent.actor_layer_norm=True --agent.ret_agg=min --agent.q_agg=min --seed=10
python main.py --env_name=relocate-cloned-v1 --agent=agents/value_flows.py --agent.discount=0.99 --agent.alpha_critic_td_vf=0.3 --agent.critic_loss_type=q-learning --next_action_extraction=sfbc --policy_extraction=sfbc --agent.ensemble_weight_type=target_ret_std_jac_est --agent.ensemble_weight_temp=0.01 --agent.clip_flow_actions=True --agent.value_layer_norm=True --agent.actor_layer_norm=True --agent.ret_agg=mean --agent.q_agg=min --seed=10
python main.py --env_name=relocate-expert-v1 --agent=agents/value_flows.py --agent.discount=0.99 --agent.alpha_critic_td_vf=0.3 --agent.critic_loss_type=q-learning --next_action_extraction=sfbc --policy_extraction=sfbc --agent.ensemble_weight_type=target_ret_std_jac_est --agent.ensemble_weight_temp=0.1 --agent.clip_flow_actions=True --agent.value_layer_norm=True --agent.actor_layer_norm=True --agent.ret_agg=min --agent.q_agg=min --seed=10

</details>
