{
    "env_name": "walker-rand-params",
    "n_train_tasks": 40,
    "n_eval_tasks": 10,
    "env_params": {
        "n_tasks": 50
    }
    ,
    "latent_size": 4
    ,
    "algo_params": {
        "num_iterations": 5000,
        "num_steps_prior": 128,
        "num_steps_posterior": 128,
        "num_extra_rl_steps_posterior": 128,
        "meta_batch": 10,
        "num_evals": 1,
        "num_train_steps_per_itr": 1000,
        "num_steps_per_eval": 256,
        "embedding_batch_size": 256,
        "embedding_mini_batch_size": 256,
        "batch_size": 256,
        "max_path_length": 64,
        "meta_episode_len": 3,
        "kl_lambda": 1e-0,
        "dump_eval_paths":0,
        "snail": 1,
        "num_trajs" : 3,
        "num_trajs_init": 3,
        "policy_lr": 3e-4,
        "qf_lr": 3e-4,
        "vf_lr": 3e-4,
        "context_lr": 3e-4,
        "gradient_from_Q": 1,
        "prediction": 1,
        "prediction_reward_scale": 1,
        "prediction_transition_scale": 1,
        "reward_scale": 10,
        "eval_deterministic": 1,
        "sparse_rewards": 0,

        "last_reward_only": 0,
        "intrinsic_reward_weight": 0.5,
        "use_kl_div_intrinsic": 0,
        "use_per": 0,
        "alpha": 1,
        "beta": 1,
        "reward_bias": 1e-1,
        "reward_decay": 0.6,
        "rsample_rate": 4,
        "consider_dynamics": 1,
        "intrinsic_reward_decay": 1
    },
    "util_params": {
        "base_log_dir":"outputmetacure"

    }
}
