
<Collect data of pre-trained policy for Patrol Forward, Balance, Backward>
--(front forward)--
python -m main --hrl False --train False --complex_task patrol --collect_exp_data True --env Walker2dForward-v1 --suffix patrol_wff --primitive_env Walker2dForward-v1 --primitive_path data/Walker2dForward.forward_ICLR2019
--(rear forward)--
python -m main --hrl False --train False --complex_task patrol --collect_exp_data True --env Walker2dForward-v1 --front False --suffix patrol_wfr --primitive_env Walker2dForward-v1 --primitive_path data/Walker2dForward.forward_ICLR2019
--(front balance)--
python -m main --hrl False --train False --complex_task patrol --collect_exp_data True --env Walker2dBalance-v1 --suffix patrol_bf --primitive_env Walker2dBalance-v1 --primitive_path data/Walker2dBalance.balance_ICLR2019
--(rear balance)--
python -m main --hrl False --train False --complex_task patrol --collect_exp_data True --env Walker2dBalance-v1 --front False --suffix patrol_br --primitive_env Walker2dBalance-v1 --primitive_path data/Walker2dBalance.balance_ICLR2019
--(front backward)--
python -m main --hrl False --train False --complex_task patrol --collect_exp_data True --env Walker2dBackward-v1 --suffix patrol_wbf --primitive_env Walker2dBackward-v1 --primitive_path data/Walker2dBackward.backward_ICLR2019
--(rear backward)--
python -m main --hrl False --train False --complex_task patrol --collect_exp_data True --env Walker2dBackward-v1 --front False --suffix patrol_wbr --primitive_env Walker2dBackward-v1 --primitive_path data/Walker2dBackward.backward_ICLR2019

<Train transition policy for Patrol Forward<->Balance>
--(front)--
python -m main --hrl False --train True --complex_task patrol --irl_training True --env Walker2dForward-v1 --exp_data_path_1 data/exp_demo/Walker2dForward-v1_patrol_wff --exp_data_path_2 data/exp_demo/Walker2dBalance-v1_patrol_br --env_1 patrol_forward --env_2 balance
--(rear)--
python -m main --hrl False --train True --complex_task patrol --irl_training True --env Walker2dForward-v1 --front False --exp_data_path_1 data/exp_demo/Walker2dBalance-v1_patrol_bf --exp_data_path_2 data/exp_demo/Walker2dForward-v1_patrol_wfr --env_1 patrol_balance --env_2 forward

<Train transition policy for Patrol Backward->Balance>
--(front)--
python -m main --hrl False --train True --complex_task patrol --irl_training True --env Walker2dBackward-v1 --exp_data_path_1 data/exp_demo/Walker2dBackward-v1_patrol_wbf --exp_data_path_2 data/exp_demo/Walker2dBalance-v1_patrol_br --env_1 patrol_backward --env_2 balance
--(rear)--
python -m main --hrl False --train True --complex_task patrol --irl_training True --env Walker2dBackward-v1 --front False --exp_data_path_1 data/exp_demo/Walker2dBalance-v1_patrol_bf --exp_data_path_2 data/exp_demo/Walker2dBackward-v1_patrol_wbr --env_1 patrol_balance --env_2 backward


<Train Q network for Patrol>
python -m main --hrl True --train True --complex_task patrol --env Walker2dPatrol-v1 --pi1_env Walker2dForward-v1 --pi2_env Walker2dBackward-v1 --pi3_env Walker2dBalance-v1 --pi1 data/Walker2dForward.forward_ICLR2019 --pi2 data/Walker2dBackward.backward_ICLR2019 --pi3 data/Walker2dBalance.balance_ICLR2019 --pi13 data/transition/patrol_forward_balance/step10000000/model.pt --pi31 data/transition/patrol_balance_forward/step10000000/model.pt --pi23 data/transition/patrol_backward_balance/step10000000/model.pt --pi32 data/transition/patrol_balance_backward/step10000000/model.pt --fname patrol

<Evaluate trained networks>
python -m main --hrl True --train Fasle --complex_task patrol --env Walker2dPatrol-v1 --pi1_env Walker2dForward-v1 --pi2_env Walker2dBackward-v1 --pi3_env Walker2dBalance-v1 --pi1 data/Walker2dForward.forward_ICLR2019 --pi2 data/Walker2dBackward.backward_ICLR2019 --pi3 data/Walker2dBalance.balance_ICLR2019 --pi13 data/transition/patrol_forward_balance/step10000000/model.pt --pi31 data/transition/patrol_balance_forward/step10000000/model.pt --pi23 data/transition/patrol_backward_balance/step10000000/model.pt --pi32 data/transition/patrol_balance_backward/step10000000/model.pt --q13 data/q_network/patrol/30000_q13.pt --q31 data/q_network/patrol/30000_q31.pt --q23 data/q_network/patrol/30000_q23.pt --q32 data/q_network/patrol/30000_q32.pt
