{
    "env": {
        "config_file": "env_config_for_ppo_easy_medium_punishment.json",
        "custom_config": {}
    },
    "bc": {
        "experiment_name": "10hz_128_128_300epochs_easy_4",
        "policy_file_save_name": "bc_checkpoint",
        "policy_after_value_head_trained_file_save_name": "bc_checkpoint_after_value_head_trained",
        "data_cache_dir": "cache/10hz_10_5_5_iter_1_aug",
        "seed": 33,
        "train_epochs": 300,
        "batch_size": 4096,
        "prob_true_act_threshold": 10000.0,
        "loss_threshold": -8.0
    },
    "rl_bc": {
        "experiment_name": "10hz_128_128_2e8steps_easy_medium_punishment_4",
        "seed": 36,
        "seed_for_load_algo": 37,
        "net_arch": [128, 128],
        "batch_size": 4096,
        "gamma": 0.995,
        "activate_value_head_train_steps": 2e7,
        "train_steps": 2e8,
        "rollout_process_num": 64,
        "evaluate_process_num": 32,
        "callback_process_num": 32,
        "evaluate_on_all_tasks": false,
        "evaluate_frequence": 2048,
        "evaluate_nums_in_evaluation": 30,
        "evaluate_nums_in_callback": 3,
        "kl_with_bc_model_coef": 1e-3
    },
    "rl": {
        "experiment_name": "10hz_128_128_2e8steps_easy_medium_punishment_4_singleRL",
        "seed": 39,
        "rollout_process_num": 64,
        "train_steps": 2e8
    }
}