# hopper experiments
args:
    env_name: walker2d-medium-v0 # d4rl mixed, supposedly the best for MOPO
    reward_head: True
    logvar_head: True
    states: 'uniform'
    steps_k: 1
    num_rollouts_per_step: 50
    policy_update_steps: 1000
    train_policy_every: 100
    train_val_ratio: 0.2
    real_sample_ratio: 0.05
    model_train_freq: 1000
    max_timesteps: 10000000
    n_eval_rollouts: 10
    num_models: 7
    num_elites: 5
    d4rl: True
    model_retain_epochs: 5
    mopo: True
    mopo_lam: 5
    tune_mopo_lam: True
    # mopo_uncertainty_target: 1.8
    # mopo_uncertainty_target: 2.1
    # mopo_penalty_type: ensemble_var
    # mopo_penalty_type: mopo_default
    # mopo_penalty_type: ensemble_std
    min_model_epochs: 350
    offline_epochs: 1000
    # load_model_dir: /data_mount/2021_01_28_09-07-02/checkpoints/model_saved_weights/Model_walker2d-medium-v0_seed0_2021_01_28_09-07-09
    save_policy: False
    train_memory: 2000000
    val_memory: 500000
