python ppo_props_continuous.py --env-id Ant-v4 -f results_fixed_target -s random --total-timesteps 32768 --eval-freq 1 --eval-episodes 0 -b 16 --num-steps 1024 --update-epochs 0  --props-num-steps 256 -props-lr 0.0001 --props-target-kl 0.05 --se 1 --se-freq 1 --se-lr 1e-3 --se-epochs 1000 --props-lambda 0.3  --se-ref 0
python ppo_props_continuous.py --env-id Humanoid-v4 -f results_fixed_target -s random --total-timesteps 32768 --eval-freq 1 --eval-episodes 0 -b 16 --num-steps 1024 --update-epochs 0  --props-num-steps 256 -props-lr 0.0001 --props-target-kl 0.05 --se 1 --se-freq 1 --se-lr 1e-3 --se-epochs 1000 --props-lambda 0.3 --se-ref 0
python ppo_props_continuous.py --env-id Walker2d-v4 -f results_fixed_target -s random --total-timesteps 32768 --eval-freq 1 --eval-episodes 0 -b 16 --num-steps 1024 --update-epochs 0  --props-num-steps 256 -props-lr 0.001 --props-target-kl 0.05 --se 1 --se-freq 1 --se-lr 1e-3 --se-epochs 1000 --props-lambda 0.1 --se-ref 0
python ppo_props_continuous.py --env-id Hopper-v4 -f results_fixed_target -s random --total-timesteps 32768 --eval-freq 1 --eval-episodes 0 -b 16 --num-steps 1024 --update-epochs 0  --props-num-steps 256 -props-lr 0.001 --props-target-kl 0.05 --se 1 --se-freq 1 --se-lr 1e-3 --se-epochs 1000 --props-lambda 0.1 --se-ref 0
python ppo_props_continuous.py --env-id HalfCheetah-v4 -f results_fixed_target -s random --total-timesteps 32768 --eval-freq 1 --eval-episodes 0 -b 16 --num-steps 1024 --update-epochs 0  --props-num-steps 256 -props-lr 0.001 --props-target-kl 0.05 --se 1 --se-freq 1 --se-lr 1e-3 --se-epochs 1000 --props-lambda 0.1 --se-ref 0
python ppo_props_continuous.py --env-id Swimmer-v4 -f results_fixed_target -s random --total-timesteps 32768 --eval-freq 1 --eval-episodes 0 -b 16 --num-steps 1024 --update-epochs 0  --props-num-steps 256 -props-lr 0.0001 --props-target-kl 0.05 --se 1 --se-freq 1 --se-lr 1e-3 --se-epochs 1000 --props-lambda 0.1 --se-ref 0
python ppo_props_continuous.py --env-id Ant-v4 -f results_fixed_target -s expert --total-timesteps 32768 --eval-freq 1 --eval-episodes 0 -b 16 --num-steps 1024 --update-epochs 0  --props-num-steps 256 -props-lr 0.0001 --props-target-kl 0.05 --se 1 --se-freq 1 --se-lr 1e-3 --se-epochs 1000 --props-lambda 0.3 --se-ref 0  --policy-path policies/Ant-v4/best_model.zip --normalization-dir policies/Ant-v4
python ppo_props_continuous.py --env-id Humanoid-v4 -f results_fixed_target -s expert --total-timesteps 32768 --eval-freq 1 --eval-episodes 0 -b 16 --num-steps 1024 --update-epochs 0  --props-num-steps 256 -props-lr 0.0001 --props-target-kl 0.05 --se 1 --se-freq 1 --se-lr 1e-3 --se-epochs 1000 --props-lambda 0.3 --se-ref 0  --policy-path policies/Humanoid-v4/best_model.zip --normalization-dir policies/Humanoid-v4
python ppo_props_continuous.py --env-id Walker2d-v4 -f results_fixed_target -s expert --total-timesteps 32768 --eval-freq 1 --eval-episodes 0 -b 16 --num-steps 1024 --update-epochs 0  --props-num-steps 256 -props-lr 0.001 --props-target-kl 0.05 --se 1 --se-freq 1 --se-lr 1e-3 --se-epochs 1000 --props-lambda 0.1 --se-ref 0  --policy-path policies/Walker2d-v4/best_model.zip --normalization-dir policies/Walker2d-v4
python ppo_props_continuous.py --env-id Hopper-v4 -f results_fixed_target -s expert --total-timesteps 32768 --eval-freq 1 --eval-episodes 0 -b 16 --num-steps 1024 --update-epochs 0  --props-num-steps 256 -props-lr 0.001 --props-target-kl 0.05 --se 1 --se-freq 1 --se-lr 1e-3 --se-epochs 1000 --props-lambda 0.1 --se-ref 0  --policy-path policies/Hopper-v4/best_model.zip --normalization-dir policies/Hopper-v4
python ppo_props_continuous.py --env-id HalfCheetah-v4 -f results_fixed_target -s expert --total-timesteps 32768 --eval-freq 1 --eval-episodes 0 -b 16 --num-steps 1024 --update-epochs 0  --props-num-steps 256 -props-lr 0.001 --props-target-kl 0.05 --se 1 --se-freq 1 --se-lr 1e-3 --se-epochs 1000 --props-lambda 0.1 --se-ref 0  --policy-path policies/HalfCheetah-v4/best_model.zip --normalization-dir policies/HalfCheetah-v4
python ppo_props_continuous.py --env-id Swimmer-v4 -f results_fixed_target -s expert --total-timesteps 32768 --eval-freq 1 --eval-episodes 0 -b 16 --num-steps 1024 --update-epochs 0  --props-num-steps 256 -props-lr 0.0001 --props-target-kl 0.05 --se 1 --se-freq 1 --se-lr 1e-3 --se-epochs 1000 --props-lambda 0.1 --se-ref 0  --policy-path policies/Swimmer-v4/best_model.zip --normalization-dir policies/Swimmer-v4
