## Pretrain the dynamics ensemble

python -m rlkit.examples.main algorithm=pretrain env=halfcheetah overrides.d4rl_config=medium-expert-v2 is_offline=true log_to_wandb=false overrides.policy_eval.train=false overrides.bc_prior.train=false overrides.dynamics.train=true save_dir=data/pretrain/halfcheetah/medium-expert/seed_2 overrides.dynamics.batch_size=1024 overrides.dynamics.ensemble_model.ensemble_size=30 seed=2 device=cuda:0

python -m rlkit.examples.main algorithm=pretrain env=halfcheetah overrides.d4rl_config=medium-replay-v2 is_offline=true log_to_wandb=false overrides.policy_eval.train=false overrides.bc_prior.train=false overrides.dynamics.train=true save_dir=data/pretrain/halfcheetah/medium-replay/seed_2 overrides.dynamics.batch_size=1024 overrides.dynamics.ensemble_model.ensemble_size=30 seed=2 device=cuda:1

python -m rlkit.examples.main algorithm=pretrain env=halfcheetah overrides.d4rl_config=medium-v2 is_offline=true log_to_wandb=false overrides.policy_eval.train=false overrides.bc_prior.train=false overrides.dynamics.train=true save_dir=data/pretrain/halfcheetah/medium/seed_2 overrides.dynamics.batch_size=1024 overrides.dynamics.ensemble_model.ensemble_size=30 seed=2 device=cuda:1

python -m rlkit.examples.main algorithm=pretrain env=halfcheetah overrides.d4rl_config=random-v2 is_offline=true log_to_wandb=false overrides.policy_eval.train=false overrides.bc_prior.train=false overrides.dynamics.train=true save_dir=data/pretrain/halfcheetah/random/seed_2 overrides.dynamics.batch_size=1024 overrides.dynamics.ensemble_model.ensemble_size=30 seed=2 device=cuda:2

python -m rlkit.examples.main algorithm=pretrain env=hopper overrides.d4rl_config=medium-expert-v2 is_offline=true log_to_wandb=false overrides.policy_eval.train=false overrides.bc_prior.train=false overrides.dynamics.train=true save_dir=data/pretrain/hopper/medium-expert/seed_2 overrides.dynamics.batch_size=1024 overrides.dynamics.ensemble_model.ensemble_size=30 seed=2 device=cuda:2

python -m rlkit.examples.main algorithm=pretrain env=hopper overrides.d4rl_config=medium-replay-v2 is_offline=true log_to_wandb=false overrides.policy_eval.train=false overrides.bc_prior.train=false overrides.dynamics.train=true save_dir=data/pretrain/hopper/medium-replay/seed_2 overrides.dynamics.batch_size=1024 overrides.dynamics.ensemble_model.ensemble_size=30 seed=2 device=cuda:3

python -m rlkit.examples.main algorithm=pretrain env=hopper overrides.d4rl_config=medium-v2 is_offline=true log_to_wandb=false overrides.policy_eval.train=false overrides.bc_prior.train=false overrides.dynamics.train=true save_dir=data/pretrain/hopper/medium/seed_2 overrides.dynamics.batch_size=1024 overrides.dynamics.ensemble_model.ensemble_size=30 seed=2 device=cuda:4

python -m rlkit.examples.main algorithm=pretrain env=hopper overrides.d4rl_config=random-v2 is_offline=true log_to_wandb=false overrides.policy_eval.train=false overrides.bc_prior.train=false overrides.dynamics.train=true save_dir=data/pretrain/hopper/random/seed_2 overrides.dynamics.batch_size=1024 overrides.dynamics.ensemble_model.ensemble_size=30 seed=2 device=cuda:4

python -m rlkit.examples.main algorithm=pretrain env=walker2d overrides.d4rl_config=medium-expert-v2 is_offline=true log_to_wandb=false overrides.policy_eval.train=false overrides.bc_prior.train=false overrides.dynamics.train=true save_dir=data/pretrain/walker/medium-expert/seed_2 overrides.dynamics.batch_size=1024 overrides.dynamics.ensemble_model.ensemble_size=30 seed=2 device=cuda:5

python -m rlkit.examples.main algorithm=pretrain env=walker2d overrides.d4rl_config=medium-replay-v2 is_offline=true log_to_wandb=false overrides.policy_eval.train=false overrides.bc_prior.train=false overrides.dynamics.train=true save_dir=data/pretrain/walker/medium-replay/seed_2 overrides.dynamics.batch_size=1024 overrides.dynamics.ensemble_model.ensemble_size=30 seed=2 device=cuda:6

python -m rlkit.examples.main algorithm=pretrain env=walker2d overrides.d4rl_config=medium-v2 is_offline=true log_to_wandb=false overrides.policy_eval.train=false overrides.bc_prior.train=false overrides.dynamics.train=true save_dir=data/pretrain/walker/medium/seed_2 overrides.dynamics.batch_size=1024 overrides.dynamics.ensemble_model.ensemble_size=30 seed=2 device=cuda:6

python -m rlkit.examples.main algorithm=pretrain env=walker2d overrides.d4rl_config=random-v2 is_offline=true log_to_wandb=false overrides.policy_eval.train=false overrides.bc_prior.train=false overrides.dynamics.train=true save_dir=data/pretrain/walker/random/seed_0 overrides.dynamics.batch_size=1024 overrides.dynamics.ensemble_model.ensemble_size=30 seed=0 device=cuda:7


## Pretrain the policy and the Q ensemble with behavior clone (BC) and policy evaluation (PE) respectively

python -m rlkit.examples.main algorithm=pretrain env=halfcheetah overrides.d4rl_config=medium-expert-v2 is_offline=true log_to_wandb=false overrides.policy_eval.train=true overrides.bc_prior.train=true overrides.dynamics.train=false save_dir=data/pretrain/halfcheetah/medium-expert/seed_2 overrides.bc_prior.batch_size=1024 overrides.policy_eval.batch_size=1024 overrides.policy_eval.num_value_learning_repeat=5 algorithm.algorithm_cfg.num_total_epochs=100 seed=2 device=cuda:0

python -m rlkit.examples.main algorithm=pretrain env=halfcheetah overrides.d4rl_config=medium-replay-v2 is_offline=true log_to_wandb=false overrides.policy_eval.train=true overrides.bc_prior.train=true overrides.dynamics.train=false save_dir=data/pretrain/halfcheetah/medium-replay/seed_2 overrides.bc_prior.batch_size=1024 overrides.policy_eval.batch_size=1024 overrides.policy_eval.num_value_learning_repeat=5 algorithm.algorithm_cfg.num_total_epochs=100 seed=2 device=cuda:1

python -m rlkit.examples.main algorithm=pretrain env=halfcheetah overrides.d4rl_config=medium-v2 is_offline=true log_to_wandb=false overrides.policy_eval.train=true overrides.bc_prior.train=true overrides.dynamics.train=false save_dir=data/pretrain/halfcheetah/medium/seed_2 overrides.bc_prior.batch_size=1024 overrides.policy_eval.batch_size=1024 overrides.policy_eval.num_value_learning_repeat=5 algorithm.algorithm_cfg.num_total_epochs=100 seed=2 device=cuda:1

python -m rlkit.examples.main algorithm=pretrain env=halfcheetah overrides.d4rl_config=random-v2 is_offline=true log_to_wandb=false overrides.policy_eval.train=true overrides.bc_prior.train=true overrides.dynamics.train=false save_dir=data/pretrain/halfcheetah/random/seed_2 overrides.bc_prior.batch_size=1024 overrides.policy_eval.batch_size=1024 overrides.policy_eval.num_value_learning_repeat=5 algorithm.algorithm_cfg.num_total_epochs=100 seed=2 device=cuda:2

python -m rlkit.examples.main algorithm=pretrain env=hopper overrides.d4rl_config=medium-expert-v2 is_offline=true log_to_wandb=false overrides.policy_eval.train=true overrides.bc_prior.train=true overrides.dynamics.train=false save_dir=data/pretrain/hopper/medium-expert/seed_2 overrides.bc_prior.batch_size=1024 overrides.policy_eval.batch_size=1024 overrides.policy_eval.num_value_learning_repeat=5 algorithm.algorithm_cfg.num_total_epochs=100 seed=2 device=cuda:2

python -m rlkit.examples.main algorithm=pretrain env=hopper overrides.d4rl_config=medium-replay-v2 is_offline=true log_to_wandb=false overrides.policy_eval.train=true overrides.bc_prior.train=true overrides.dynamics.train=false save_dir=data/pretrain/hopper/medium-replay/seed_2 overrides.bc_prior.batch_size=1024 overrides.policy_eval.batch_size=1024 overrides.policy_eval.num_value_learning_repeat=5 algorithm.algorithm_cfg.num_total_epochs=100 seed=2 device=cuda:3

python -m rlkit.examples.main algorithm=pretrain env=hopper overrides.d4rl_config=medium-v2 is_offline=true log_to_wandb=false overrides.policy_eval.train=true overrides.bc_prior.train=true overrides.dynamics.train=false save_dir=data/pretrain/hopper/medium/seed_2 overrides.bc_prior.batch_size=1024 overrides.policy_eval.batch_size=1024 overrides.policy_eval.num_value_learning_repeat=5 algorithm.algorithm_cfg.num_total_epochs=100 seed=2 device=cuda:4

python -m rlkit.examples.main algorithm=pretrain env=hopper overrides.d4rl_config=random-v2 is_offline=true log_to_wandb=false overrides.policy_eval.train=true overrides.bc_prior.train=true overrides.dynamics.train=false save_dir=data/pretrain/hopper/random/seed_2 overrides.bc_prior.batch_size=1024 overrides.policy_eval.batch_size=1024 overrides.policy_eval.num_value_learning_repeat=5 algorithm.algorithm_cfg.num_total_epochs=100 seed=2 device=cuda:4

python -m rlkit.examples.main algorithm=pretrain env=walker2d overrides.d4rl_config=medium-expert-v2 is_offline=true log_to_wandb=false overrides.policy_eval.train=true overrides.bc_prior.train=true overrides.dynamics.train=false save_dir=data/pretrain/walker/medium-expert/seed_2 overrides.bc_prior.batch_size=1024 overrides.policy_eval.batch_size=1024 overrides.policy_eval.num_value_learning_repeat=5 algorithm.algorithm_cfg.num_total_epochs=100 seed=2 device=cuda:5

python -m rlkit.examples.main algorithm=pretrain env=walker2d overrides.d4rl_config=medium-replay-v2 is_offline=true log_to_wandb=false overrides.policy_eval.train=true overrides.bc_prior.train=true overrides.dynamics.train=false save_dir=data/pretrain/walker/medium-replay/seed_2 overrides.bc_prior.batch_size=1024 overrides.policy_eval.batch_size=1024 overrides.policy_eval.num_value_learning_repeat=5 algorithm.algorithm_cfg.num_total_epochs=100 seed=2 device=cuda:6

python -m rlkit.examples.main algorithm=pretrain env=walker2d overrides.d4rl_config=medium-v2 is_offline=true log_to_wandb=false overrides.policy_eval.train=true overrides.bc_prior.train=true overrides.dynamics.train=false save_dir=data/pretrain/walker/medium/seed_0 overrides.bc_prior.batch_size=1024 overrides.policy_eval.batch_size=1024 overrides.policy_eval.num_value_learning_repeat=5 algorithm.algorithm_cfg.num_total_epochs=100 seed=0 device=cuda:7

python -m rlkit.examples.main algorithm=pretrain env=walker2d overrides.d4rl_config=random-v2 is_offline=true log_to_wandb=false overrides.policy_eval.train=true overrides.bc_prior.train=true overrides.dynamics.train=false save_dir=data/pretrain/walker/random/seed_0 overrides.bc_prior.batch_size=1024 overrides.policy_eval.batch_size=1024 overrides.policy_eval.num_value_learning_repeat=5 algorithm.algorithm_cfg.num_total_epochs=100 seed=0 device=cuda:7


## Reproduce CBOP results

python -m rlkit.examples.main env=halfcheetah overrides.d4rl_config=medium-expert-v2 algorithm=mvepo is_offline=true log_to_wandb=false overrides.trainer_cfg.horizon=10 overrides.trainer_cfg.num_qfs=20 algorithm.algorithm_cfg.num_epochs=1000 overrides.trainer_cfg.lcb_coeff=3. overrides.trainer_cfg.lr=3e-4 overrides.dynamics.ensemble_model.ensemble_size=30 overrides.dynamics.num_elites=20 cache_dir=data/pretrain/halfcheetah/medium-expert/seed_2 overrides.trainer_cfg.sampling_method=min overrides.offline_cfg.checkpoint_type=behavior overrides.trainer_cfg.indep_sampling=false snapshot_mode=gap_and_last algorithm.algorithm_cfg.save_snapshot_gap=30 overrides.trainer_cfg.eta=1 seed=2 device=cuda:0

python -m rlkit.examples.main env=halfcheetah overrides.d4rl_config=medium-replay-v2 algorithm=mvepo is_offline=true log_to_wandb=false overrides.trainer_cfg.horizon=10 overrides.trainer_cfg.num_qfs=20 algorithm.algorithm_cfg.num_epochs=1000 overrides.trainer_cfg.lcb_coeff=3. overrides.trainer_cfg.lr=3e-4 overrides.dynamics.ensemble_model.ensemble_size=30 overrides.dynamics.num_elites=20 cache_dir=data/pretrain/halfcheetah/medium-replay/seed_2 overrides.trainer_cfg.sampling_method=min overrides.offline_cfg.checkpoint_type=behavior overrides.trainer_cfg.indep_sampling=false snapshot_mode=gap_and_last algorithm.algorithm_cfg.save_snapshot_gap=30 overrides.trainer_cfg.eta=1 seed=2 device=cuda:1

python -m rlkit.examples.main env=halfcheetah overrides.d4rl_config=medium-v2 algorithm=mvepo is_offline=true log_to_wandb=false overrides.trainer_cfg.horizon=10 overrides.trainer_cfg.num_qfs=20 algorithm.algorithm_cfg.num_epochs=1000 overrides.trainer_cfg.lcb_coeff=3. overrides.trainer_cfg.lr=3e-4 overrides.dynamics.ensemble_model.ensemble_size=30 overrides.dynamics.num_elites=20 cache_dir=data/pretrain/halfcheetah/medium/seed_2 overrides.trainer_cfg.sampling_method=min overrides.offline_cfg.checkpoint_type=behavior overrides.trainer_cfg.indep_sampling=false snapshot_mode=gap_and_last algorithm.algorithm_cfg.save_snapshot_gap=30 overrides.trainer_cfg.eta=1 seed=2 device=cuda:1

python -m rlkit.examples.main env=halfcheetah overrides.d4rl_config=random-v2 algorithm=mvepo is_offline=true log_to_wandb=false overrides.trainer_cfg.horizon=10 overrides.trainer_cfg.num_qfs=20 algorithm.algorithm_cfg.num_epochs=1000 overrides.trainer_cfg.lcb_coeff=3. overrides.trainer_cfg.lr=3e-4 overrides.dynamics.ensemble_model.ensemble_size=30 overrides.dynamics.num_elites=20 cache_dir=data/pretrain/halfcheetah/random/seed_2 overrides.trainer_cfg.sampling_method=min overrides.offline_cfg.checkpoint_type=behavior overrides.trainer_cfg.indep_sampling=false snapshot_mode=gap_and_last algorithm.algorithm_cfg.save_snapshot_gap=30 overrides.trainer_cfg.eta=1 seed=2 device=cuda:2

python -m rlkit.examples.main env=hopper overrides.d4rl_config=medium-expert-v2 algorithm=mvepo is_offline=true log_to_wandb=false overrides.trainer_cfg.horizon=10 overrides.trainer_cfg.num_qfs=50 algorithm.algorithm_cfg.num_epochs=1000 overrides.trainer_cfg.lcb_coeff=3. overrides.trainer_cfg.lr=3e-4 overrides.dynamics.ensemble_model.ensemble_size=30 overrides.dynamics.num_elites=20 cache_dir=data/pretrain/hopper/medium-expert/seed_2 overrides.trainer_cfg.sampling_method=min overrides.offline_cfg.checkpoint_type=behavior overrides.trainer_cfg.indep_sampling=false snapshot_mode=gap_and_last algorithm.algorithm_cfg.save_snapshot_gap=30 overrides.trainer_cfg.eta=1 seed=2 device=cuda:2

python -m rlkit.examples.main env=hopper overrides.d4rl_config=medium-replay-v2 algorithm=mvepo is_offline=true log_to_wandb=false overrides.trainer_cfg.horizon=10 overrides.trainer_cfg.num_qfs=50 algorithm.algorithm_cfg.num_epochs=1000 overrides.trainer_cfg.lcb_coeff=3. overrides.trainer_cfg.lr=3e-4 overrides.dynamics.ensemble_model.ensemble_size=30 overrides.dynamics.num_elites=20 cache_dir=data/pretrain/hopper/medium-replay/seed_2 overrides.trainer_cfg.sampling_method=min overrides.offline_cfg.checkpoint_type=behavior overrides.trainer_cfg.indep_sampling=false snapshot_mode=gap_and_last algorithm.algorithm_cfg.save_snapshot_gap=30 overrides.trainer_cfg.eta=1 seed=2 device=cuda:3

python -m rlkit.examples.main env=hopper overrides.d4rl_config=medium-v2 algorithm=mvepo is_offline=true log_to_wandb=false overrides.trainer_cfg.horizon=10 overrides.trainer_cfg.num_qfs=50 algorithm.algorithm_cfg.num_epochs=1000 overrides.trainer_cfg.lcb_coeff=3. overrides.trainer_cfg.lr=3e-4 overrides.dynamics.ensemble_model.ensemble_size=30 overrides.dynamics.num_elites=20 cache_dir=data/pretrain/hopper/medium/seed_2 overrides.trainer_cfg.sampling_method=min overrides.offline_cfg.checkpoint_type=behavior overrides.trainer_cfg.indep_sampling=false snapshot_mode=gap_and_last algorithm.algorithm_cfg.save_snapshot_gap=30 overrides.trainer_cfg.eta=1 seed=2 device=cuda:4

python -m rlkit.examples.main env=hopper overrides.d4rl_config=random-v2 algorithm=mvepo is_offline=true log_to_wandb=false overrides.trainer_cfg.horizon=10 overrides.trainer_cfg.num_qfs=50 algorithm.algorithm_cfg.num_epochs=1000 overrides.trainer_cfg.lcb_coeff=3. overrides.trainer_cfg.lr=3e-4 overrides.dynamics.ensemble_model.ensemble_size=30 overrides.dynamics.num_elites=20 cache_dir=data/pretrain/hopper/random/seed_2 overrides.trainer_cfg.sampling_method=min overrides.offline_cfg.checkpoint_type=behavior overrides.trainer_cfg.indep_sampling=false snapshot_mode=gap_and_last algorithm.algorithm_cfg.save_snapshot_gap=30 overrides.trainer_cfg.eta=1 seed=2 device=cuda:4

python -m rlkit.examples.main env=walker2d overrides.d4rl_config=medium-expert-v2 algorithm=mvepo is_offline=true log_to_wandb=false overrides.trainer_cfg.horizon=10 overrides.trainer_cfg.num_qfs=20 algorithm.algorithm_cfg.num_epochs=1000 overrides.trainer_cfg.lcb_coeff=3. overrides.trainer_cfg.lr=3e-4 overrides.dynamics.ensemble_model.ensemble_size=30 overrides.dynamics.num_elites=20 cache_dir=data/pretrain/walker/medium-expert/seed_2 overrides.trainer_cfg.sampling_method=min overrides.offline_cfg.checkpoint_type=behavior overrides.trainer_cfg.indep_sampling=false snapshot_mode=gap_and_last algorithm.algorithm_cfg.save_snapshot_gap=30 overrides.trainer_cfg.eta=1 seed=2 device=cuda:5

python -m rlkit.examples.main env=walker2d overrides.d4rl_config=medium-replay-v2 algorithm=mvepo is_offline=true log_to_wandb=false overrides.trainer_cfg.horizon=10 overrides.trainer_cfg.num_qfs=20 algorithm.algorithm_cfg.num_epochs=1000 overrides.trainer_cfg.lcb_coeff=3. overrides.trainer_cfg.lr=3e-4 overrides.dynamics.ensemble_model.ensemble_size=30 overrides.dynamics.num_elites=20 cache_dir=data/pretrain/walker/medium-replay/seed_2 overrides.trainer_cfg.sampling_method=min overrides.offline_cfg.checkpoint_type=behavior overrides.trainer_cfg.indep_sampling=false snapshot_mode=gap_and_last algorithm.algorithm_cfg.save_snapshot_gap=30 overrides.trainer_cfg.eta=1 seed=2 device=cuda:6

python -m rlkit.examples.main env=walker2d overrides.d4rl_config=medium-v2 algorithm=mvepo is_offline=true log_to_wandb=false overrides.trainer_cfg.horizon=10 overrides.trainer_cfg.num_qfs=20 algorithm.algorithm_cfg.num_epochs=1000 overrides.trainer_cfg.lcb_coeff=3. overrides.trainer_cfg.lr=3e-4 overrides.dynamics.ensemble_model.ensemble_size=30 overrides.dynamics.num_elites=20 cache_dir=data/pretrain/walker/medium/seed_2 overrides.trainer_cfg.sampling_method=min overrides.offline_cfg.checkpoint_type=behavior overrides.trainer_cfg.indep_sampling=false snapshot_mode=gap_and_last algorithm.algorithm_cfg.save_snapshot_gap=30 overrides.trainer_cfg.eta=1 seed=2 device=cuda:6

python -m rlkit.examples.main env=walker2d overrides.d4rl_config=random-v2 algorithm=mvepo is_offline=true log_to_wandb=false overrides.trainer_cfg.horizon=10 overrides.trainer_cfg.num_qfs=20 algorithm.algorithm_cfg.num_epochs=1000 overrides.trainer_cfg.lcb_coeff=3. overrides.trainer_cfg.lr=3e-4 overrides.dynamics.ensemble_model.ensemble_size=30 overrides.dynamics.num_elites=20 cache_dir=data/pretrain/walker/random/seed_2 overrides.trainer_cfg.sampling_method=min overrides.offline_cfg.checkpoint_type=behavior overrides.trainer_cfg.indep_sampling=false snapshot_mode=gap_and_last algorithm.algorithm_cfg.save_snapshot_gap=30 overrides.trainer_cfg.eta=1 seed=2 device=cuda:7
