{
    "seed": 1,
    "env_type": "gym",
    "env_id": "CartPole-Middle",
    "env": "tools.environments.create('gym', 'CartPole-Middle', normalize_states=False)",
    "flow_epochs": 20,
    "minibatch_size": 64,
    "discount_factor": 0.99,
    "learning_rate": 5e-4,
    "learning_rate_feasibility": 2.5e-5,
    "replay_buffer_size": 10000,
    "hidden_size": 64,
    "subepochs": 25,
    "debug": true,
    "episodes_per_epoch": 20,
    "ppo_clip_param": 0.1,
    "forward_crl": "CPPO",
    "policy_class": "tools.algorithms.PPOPolicy",
    "ppo_entropy_coef": 0.01,
    "ppo_epochs": 300,
    "ppo_epochs_novelty": 150,
    "ppo_outer_epochs_novelty": 5,
    "normalize_flow_inputs": true,
    "window": 25,
    "beta": 30,
    "past_pi_weights": [],
    "past_pi_dissimilarities": [],
    "expert_episodes": 50,
    "pi_episodes": 50,
    "updates_per_epoch": 20,
    "alpha": 15,
    "outer_epochs": 10,
    "normalize_func": "max",
    "cost_condition": "lambda s, a: (s[0] > 1. and a == 1) or (s[0] < -1. and a == 0)",
    "cost_comparison": "lambda mc, c: __import__('tools').utils.mse(mc, c)",
    "accrual_comparison": "lambda ea, a: __import__('tools').utils.wasserstein_distance2d(ea[0, :].reshape(1, -1), a[0, :].reshape(1, -1)) + __import__('tools').utils.wasserstein_distance2d(ea[1, :].reshape(1, -1), a[1, :].reshape(1, -1))",
    "input_format": "lambda s, a: [*s, a]",
    "vector_input_format": "lambda S, A: torch.cat((S, A), dim=-1)",
    "state_reduction": "lambda s: s[:1]",
    "vector_state_reduction": "lambda S: S[:, :, :1]",
    "action_reduction": "lambda a: a",
    "vector_action_reduction": "lambda A: A",
    "i": 2,
    "mix_save_epoch": 150
}
