{
    "env_name": "cheetah_run",
    "log_save_dir": "./log/",
    "start_onpol_iter": 0,
    "num_path_random": 1000,
    "num_path_onpol": 3,
    "env_horizon": 1000,
    "max_train_data": 10000000,
    "max_val_data": 10000000,
    "discard_ratio": 0.0,
    "dynamics":{
      "pre_training": {
        "mode": "offline",
        "itr": 0,
        "policy_itr": 25
      },
      "model": "nn",
      "ensemble": true,
      "enable_particle_ensemble": true,
      "particles": 5,
      "intrinsic_reward_only": false,
      "external_reward_evaluation_interval": 5,
      "obs_var": 1.0,
      "intrinsic_reward_coeff": 1.0,
      "ita": 1.0,
      "mode": "random",
      "val": true,
      "n_layers": 2,
      "hidden_size": 1024,
      "activation": "relu",
      "batch_size": 1000,
      "learning_rate": 1e-3,
      "epochs": 200,
      "kfac_params":{
        "learning_rate": 1e-1,
        "damping": 1e-3,
        "momentum": 0.9,
        "kl_clip": 1e-4,
        "cov_ema_decay": 0.99
      }
    },
    "policy":{
      "network_shape": [200, 200],
      "init_logstd": 0.0,
      "activation": "relu",
      "reinitialize_every_itr": false
    },
    "behavior_policy": {
      "network_shape": [200,200],
      "init_logstd": 0.0,
      "activation": "relu",
      "reinitialize_every_itr": false,
      "val": true,
      "total_train_steps": 300000,
      "batch_size": 256,
      "learning_rate": 5e-4
    },
    "trpo":{
      "gamma": 0.99,
      "iterations": 25,
      "gae": 0.95,
      "visualization": false,
      "visualize_iterations": [0]
    }
  }