{
  "algorithm_kwargs": {
    "demo_batch_size": 32,
    "gen_replay_buffer_capacity": 16384,
    "n_disc_updates_per_round": 8
  },
  "checkpoint_interval": 20,
  "demonstrations": {
    "rollout_type": "ppo-huggingface",
    "n_expert_demos": null
  },
  "reward": {
    "add_std_alpha": null,
    "ensemble_size": null,
    "net_cls": {
      "py/type": "imitation.rewards.reward_nets.BasicRewardNet"
    },
    "net_kwargs": {
      "normalize_input_layer": {
        "py/type": "imitation.util.networks.RunningNorm"
      }
    },
    "normalize_output_layer": {
      "py/type": "imitation.util.networks.RunningNorm"
    }
  },
  "rl": {
    "batch_size": 16384,
    "rl_cls": {
      "py/type": "stable_baselines3.ppo.ppo.PPO"
    },
    "rl_kwargs": {
      "batch_size": 16,
      "clip_range": 0.3,
      "ent_coef": 0.008871887607426377,
      "gae_lambda": 0.8,
      "gamma": 0.995,
      "learning_rate": 2.428297806883194e-5,
      "max_grad_norm": 0.9,
      "n_epochs": 10,
      "vf_coef": 0.4351450387648799
    }
  },
  "total_timesteps": 10000000,
  "policy": {
    "policy_cls": {
      "py/type": "imitation.policies.base.FeedForward32Policy"
    },
    "policy_kwargs": {
      "features_extractor_class": {
        "py/type": "imitation.policies.base.NormalizeFeaturesExtractor"
      },
      "features_extractor_kwargs": {
        "normalize_class": {
          "py/type": "imitation.util.networks.RunningNorm"
        }
      }
    }
  },
  "policy_evaluation": {
    "n_episodes_eval": 50
  },
  "environment": {
    "gym_id": "seals/Ant-v0"
  }
}
