python train_iq.py env=ant agent=sac expert.demos=1 method.loss=value env.demo=Ant-v2_d4rl.pkl expert.subsample_freq=1 

python train_iq.py env=cheetah agent=sac expert.demos=1 method.loss=value env.demo=HalfCheetah-v2_d4rl.pkl expert.subsample_freq=1

python train_iq.py env=hopper agent=sac expert.demos=5 method.loss=v0 env.demo=Hopper-v2_d4rl.pkl expert.subsample_freq=5

python train_iq.py env=humanoid agent=sac expert.demos=5 method.loss=v0 env.demo=Humanoid-v2_d4rl.pkl expert.subsample_freq=5

python train_iq.py env=walker agent=sac expert.demos=1 method.loss=value env.demo=Walker2d-v2_d4rl.pkl expert.subsample_freq=1 