{
    "seed": 1,
    "env_type": "gridworld",
    "env_id": "custom2",
    "env": "tools.environments.create('gridworld', 'custom2', normalize_states=False)",
    "flow_epochs": 20,
    "minibatch_size": 64,
    "discount_factor": 1.0,
    "learning_rate": 5e-4,
    "learning_rate_feasibility": 2.5e-5,
    "replay_buffer_size": 10000,
    "hidden_size": 64,
    "subepochs": 25,
    "debug": true,
    "episodes_per_epoch": 20,
    "ppo_clip_param": 0.1,
    "forward_crl": "CPPO",
    "policy_class": "tools.algorithms.PPOPolicy",
    "ppo_entropy_coef": 0.01,
    "ppo_epochs": 500,
    "ppo_epochs_novelty": 250,
    "ppo_outer_epochs_novelty": 5,
    "normalize_flow_inputs": true,
    "window": 25,
    "beta": 0.99,
    "past_pi_weights": [],
    "past_pi_dissimilarities": [],
    "expert_episodes": 50,
    "pi_episodes": 50,
    "updates_per_epoch": 20,
    "alpha": 15,
    "outer_epochs": 10,
    "normalize_func": "max",
    "cost_condition": "lambda s, a: s[0] in [2,3,4] and s[1] in [2,3,4]",
    "cost_comparison": "lambda mc, c: __import__('tools').utils.mse(mc, c)",
    "accrual_comparison": "lambda ea, a: __import__('tools').utils.wasserstein_distance2d(ea, a)",
    "input_format": "lambda s, a: [*s]",
    "vector_input_format": "lambda S, A: S",
    "state_reduction": "lambda s: s",
    "vector_state_reduction": "lambda S: S",
    "action_reduction": "lambda a: a",
    "vector_action_reduction": "lambda A: A",
    "i": 2,
    "mix_save_epoch": 250
}
