
<Collect data of pre-trained policy for Serve>
--(front toss)--
python -m main --hrl False --train False --complex_task serve --collect_exp_data True --env JacoServe-v1 --suffix front --primitive_env JacoToss-v1 --primitive_path data/JacoToss.toss_ICLR2019
--(rear hit)--
python -m main --hrl False --train False --complex_task serve --collect_exp_data True --env JacoHit-v1 --front False --suffix rear --primitive_env JacoHit-v1 --primitive_path data/JacoHit.hit_ICLR2019


<Train transition policy for Serve>
--(serve)--
python -m main --hrl False --train True --irl_training True --complex_task serve --env JacoServe-v1 --exp_data_path_1 data/exp_demo/JacoToss-v1_front --exp_data_path_2 data/exp_demo/JacoHit-v1_rear --env_1 toss --env_2 hit


<Train Q network for Pick>
python -m main --hrl True --train True --complex_task serve --env JacoServe-v1 --pi1_env JacoToss-v1 --pi2_env JacoHit-v1 --pi1 data/JacoToss.toss_ICLR2019 --pi2 data/JacoHit.hit_ICLR2019 --pi12 data/transition/toss_hit/step10000000/model.pt --fname serve

<Evaluate trained networks>
python -m main --hrl True --train Fasle --complex_task serve --env JacoServe-v1 --pi1_env JacoToss-v1 --pi2_env JacoHit-v1 --pi1 data/JacoToss.toss_ICLR2019 --pi2 data/JacoHit.hit_ICLR2019 --pi12 data/transition/toss_hit/step10000000/model.pt --q12 data/q_network/serve/25000_q12.pt

