
<Collect data of pre-trained policy for Pick>
--(front pick)--
python -m main --hrl False --train False --complex_task pick --collect_exp_data True --env JacoKeepPick-v1 --suffix front --primitive_env JacoPick-v1 --primitive_path data/JacoPick.pick_ICLR2019

--(rear pick)--
python -m main --hrl False --train False --complex_task pick --collect_exp_data True --env JacoPick-v1 --front False --suffix rear --primitive_env JacoPick-v1 --primitive_path data/JacoPick.pick_ICLR2019


<Train transition policy for Pick>
--(pick)--
python -m main --hrl False --train True --irl_training True --complex_task pick --env JacoPick-v1 --exp_data_path_1 data/exp_demo/JacoPick-v1_front --exp_data_path_2 data/exp_demo/JacoPick-v1_rear --env_1 pick --env_2 pick


<Train Q network for Pick>
python -m main --hrl True --train True --complex_task pick --env JacoKeepPick-v1 --pi1_env JacoPick-v1 --pi1 data/JacoPick.pick_ICLR2019 --pi12 data/transition/pick_pick/step10000000/model.pt --fname pick

<Evaluate trained networks>
python -m main --hrl True --train Fasle --complex_task pick --env JacoKeepPick-v1 --pi1_env JacoPick-v1 --pi1 data/JacoPick.pick_ICLR2019 --pi12 data/transition/pick_pick/step10000000/model.pt --q12 data/q_network/pick/15000_q12.pt
