# Improving Online Reinforcement Learning via Behavior Prior Distillation

# Train  B2PD on MuJoCo environments
$ python main.py --env=HalfCheetah-v3   --device=cuda:0  --seed=0  --xi=0.1  --eta=1.0 --H=10 --warmup_timestamps=4000  --policy_freq=1 --start_timesteps=10000 --policy=B2PD

# Train  B2PD on PyBullet environments
$ python main_pybullet.py   --device=cuda:0  --seed=0   --xi=0.1 --eta=1.0 --H=10 --warmup_timestamps=4000   --policy_freq=1 --start_timesteps=10000 --policy=B2PD

# Train  B2PD on state-based DMC environments
$ python main_dmc.py   --device=cuda:0  --seed=0  --eta=1.0 --H=10 --warmup_timestamps=4000   --policy_freq=1 --start_timesteps=10000 --policy=B2PD


# Train  ALH on MuJoCo environments
$ python main.py --env=HalfCheetah-v3   --device=cuda:0  --seed=0  --start_timesteps=10000 --policy=ALH 

# Train  TD3 on MuJoCo environments
$ python main.py --env=HalfCheetah-v3   --device=cuda:0  --seed=0  --policy=TD3

# Train  DDPG on MuJoCo environments
$ python main.py --env=HalfCheetah-v3   --device=cuda:0  --seed=0  --policy=DDPG