
<Collect data of pre-trained policy for Catch>
--(front catch)--
python -m main --hrl False --train False --complex_task catch --collect_exp_data True --env JacoKeepCatch-v1 --suffix front --primitive_env JacoCatch-v1 --primitive_path data/JacoCatch.catch_ICLR2019

--(rear catch)--
python -m main --hrl False --train False --complex_task catch --collect_exp_data True --env JacoCatch-v1 --front False --suffix rear --primitive_env JacoCatch-v1 --primitive_path data/JacoCatch.catch_ICLR2019


<Train transition policy for Catch>
--(catch)--
python -m main --hrl False --train True --irl_training True --complex_task catch --env JacoCatch-v1 --exp_data_path_1 data/exp_demo/JacoCatch-v1_front --exp_data_path_2 data/exp_demo/JacoCatch-v1_rear --env_1 catch --env_2 catch


<Train Q network for Pick>
python -m main --hrl True --train True --complex_task catch --env JacoKeepCatch-v1 --pi1_env JacoCatch-v1 --pi1 data/JacoCatch.catch_ICLR2019 --pi12 data/transition/catch_catch/step10000000/model.pt --fname catch

<Evaluate trained networks>
python -m main --hrl True --train Fasle --complex_task catch --env JacoKeepCatch-v1 --pi1_env JacoCatch-v1 --pi1 data/JacoCatch.catch_ICLR2019 --pi12 data/transition/catch_catch/step10000000/model.pt --q12 data/q_network/catch/20000_q12.pt

