# BO experiments
args:
    env_name: hopper-medium-expert-v0
    reward_head: True
    logvar_head: True
    states: 'uniform'
#    steps_k: 5
    num_rollouts_per_step: 50
    policy_update_steps: 1000
    train_policy_every: 100
    train_val_ratio: 0.2
    real_sample_ratio: 0.05
    model_train_freq: 1000
    max_timesteps: 10000000
    n_eval_rollouts: 10
#    num_models: 7
    num_elites: 5
    d4rl: True
    model_retain_epochs: 5
    mopo: True
#    mopo_lam: 5
#    offline_epochs: 1000
    load_model_dir: /Meta-Offline-RL/checkpoints/model_saved_weights/Model_hopper-medium-expert-v0_seed100_2021_04_08_01-34-22
    ensemble_replace_model_dirs:
        # - /Meta-Offline-RL/checkpoints/model_saved_weights/Model_hopper-medium-expert-v0_seed100_2021_04_08_01-34-22
        # - /Meta-Offline-RL/checkpoints/model_saved_weights/Model_hopper-medium-expert-v0_seed200_2021_04_08_01-34-26
        # - /Meta-Offline-RL/checkpoints/model_saved_weights/Model_hopper-medium-expert-v0_seed300_2021_04_08_01-34-26
       - /Meta-Offline-RL/checkpoints/model_saved_weights/Model_hopper-medium-expert-v0_seed400_2021_04_08_01-34-27
       - /Meta-Offline-RL/checkpoints/model_saved_weights/Model_hopper-medium-expert-v0_seed500_2021_04_08_01-34-27
       - /Meta-Offline-RL/checkpoints/model_saved_weights/Model_hopper-medium-expert-v0_seed600_2021_04_08_01-34-30
    augment_offline_data: False
#    save_policy: True
    train_memory: 5000000
    val_memory: 1250000
