# halfcheetah experiments debug
args:
    env_name: halfcheetah-medium-replay-v0 # d4rl mixed, supposedly the best for MOPO
    reward_head: True
    logvar_head: True
    states: 'uniform'
    steps_k: 5
    num_rollouts_per_step: 400
    model_epochs: 20
    policy_update_steps: 40
    train_policy_every: 100
    train_val_ratio: 0.2
    real_sample_ratio: 0.05
    model_update_freq: 250
    max_timesteps: 10000000
    n_eval_rollouts: 1
    num_models: 7
    num_elites: 5
    espi: True
#    comment: 'debug'
    d4rl: True
    model_retain_epochs: 20
#    train_memory: 1000000
#    val_memory: 250000
