environment:
    iterations: 100
    instances:
    - instance0001
    - instance0002
    - instance0003
    - instance0004
    - instance0005
    - instance0006
    - instance0007
    - instance0008
    - instance0009
    - instance0010
    - instance0011
    - instance0012
    - instance0013
    - instance0014
    - instance0015
    - instance0016
    - instance0017
    - instance0018
    - instance0019
    - instance0020
    - instance0021
    - instance0022
    - instance0023
    - instance0024
    - instance0025
    - instance0026
    - instance0027
    - instance0028
    - instance0029
    - instance0030
    - instance0031
    - instance0032
    - instance0033
    - instance0034
    - instance0035
    - instance0036
    - instance0037
    - instance0038
    - instance0039
    - instance0040
    - instance0041
    - instance0042
    - instance0043
    - instance0044
    - instance0045
    - instance0046
    - instance0047
    - instance0048
    - instance0049
    - instance0050
    - instance0051
    - instance0052
    - instance0053
    - instance0054
    - instance0055
    - instance0056
    - instance0057
    - instance0058
    - instance0059
    - instance0060
    - instance0061
    - instance0062
    - instance0063
    - instance0064
    - instance0065
    - instance0066
    - instance0067
    - instance0068
    - instance0069
    - instance0070
    - instance0071
    - instance0072
    - instance0073
    - instance0074
    - instance0075
    - instance0076
    - instance0077
    - instance0078
    - instance0079
    - instance0080
    - instance0081
    - instance0082
    - instance0083
    - instance0084
    - instance0085
    - instance0086
    - instance0087
    - instance0088
    - instance0089
    - instance0090
    - instance0091
    - instance0092
    - instance0093
    - instance0094
    - instance0095
    - instance0096
    - instance0097
    - instance0098
    - instance0099
    - instance0100
    - instance0101
    - instance0102
    - instance0103
    - instance0104
    - instance0105
    - instance0106
    - instance0107
    - instance0108
    - instance0109
    - instance0110
    - instance0111
    - instance0112
    - instance0113
    - instance0114
    - instance0115
    - instance0116
    - instance0117
    - instance0118
    - instance0119
    - instance0120
    - instance0121
    - instance0122
    - instance0123
    - instance0124
    - instance0125
    - instance0126
    - instance0127
    - instance0128
    - instance0129
    - instance0130
    - instance0131
    - instance0132
    - instance0133
    - instance0134
    - instance0135
    - instance0136
    - instance0137
    - instance0138
    - instance0139
    - instance0140
    - instance0141
    - instance0142
    - instance0143
    - instance0144
    - instance0145
    - instance0146
    - instance0147
    - instance0148
    - instance0149
    - instance0150
    - instance0151
    - instance0152
    - instance0153
    - instance0154
    - instance0155
    - instance0156
    - instance0157
    - instance0158
    - instance0159
    - instance0160
    - instance0161
    - instance0162
    - instance0163
    - instance0164
    - instance0165
    - instance0166
    - instance0167
    - instance0168
    - instance0169
    - instance0170
    - instance0171
    - instance0172
    - instance0173
    - instance0174
    - instance0175
    - instance0176
    - instance0177
    - instance0178
    - instance0179
    - instance0180
    - instance0181
    - instance0182
    - instance0183
    - instance0184
    - instance0185
    - instance0186
    - instance0187
    - instance0188
    - instance0189
    - instance0190
    - instance0191
    - instance0192
    - instance0193
    - instance0194
    - instance0195
    - instance0196
    - instance0197
    - instance0198
    - instance0199
    - instance0200
    - instance0201
    - instance0202
    - instance0203
    - instance0204
    - instance0205
    - instance0206
    - instance0207
    - instance0208
    - instance0209
    - instance0210
    - instance0211
    - instance0212
    - instance0213
    - instance0214
    - instance0215
    - instance0216
    - instance0217
    - instance0218
    - instance0219
    - instance0220
    - instance0221
    - instance0222
    - instance0223
    - instance0224
    - instance0225
    - instance0226
    - instance0227
    - instance0228
    - instance0229
    - instance0230
    - instance0231
    - instance0232
    - instance0233
    - instance0234
    - instance0235
    - instance0236
    - instance0237
    - instance0238
    - instance0239
    - instance0240
    - instance0241
    - instance0242
    - instance0243
    - instance0244
    - instance0245
    - instance0246
    - instance0247
    - instance0248
    - instance0249
    - instance0250
main:
    model: PPO
    policy: ActorCriticPolicy
    n_workers: 10
    n_steps: 300000
    save_every: 20000
    logs: null
models:
    PPO:
        n_steps: 256
        batch_size: 64
        n_epochs: 10
        gamma: 0.99
        gae_lambda: 0.95
        clip_range: 0.2
        ent_coef: 0.0
        learning_rate: 0.0003
        vf_coef: 0.5
        max_grad_norm: 0.5
        verbose: 1
    DQN:
        gamma: 0.99
        learning_rate: 0.001
        buffer_size: 20000
        exploration_fraction: 0.1
        exploration_final_eps: 0.01
        train_freq: 1
        batch_size: 32
        learning_starts: 1000
        target_network_update_freq: 500
        prioritized_replay: false
        prioritized_replay_alpha: 0.2
        prioritized_replay_beta0: 0.4
        prioritized_replay_beta_iters: None
        prioritized_replay_eps: 1.0e-06
        param_noise: false
        verbose: 1
        full_tensorboard_log: false
        _init_setup_model: true
    A2C:
        learning_rate: 0.0007
        n_steps: 5
        gamma: 0.99
        gae_lambda: 1.0
        ent_coef: 0.0
        vf_coef: 0.5
        max_grad_norm: 0.5
        verbose: 1
    ACER:
        gamma: 0.99
        n_steps: 20
        num_procs: 1
        q_coef: 0.5
        ent_coef: 0.01
        max_grad_norm: 10
        learning_rate: 0.0007
        lr_schedule: linear
        rprop_alpha: 0.99
        rprop_epsilon: 0.0001
        buffer_size: 5000
        replay_ratio: 4
        replay_start: 1000
        correction_term: 10.0
        trust_region: true
        alpha: 0.99
        delta: 1
        verbose: 0
    ACKTR:
        gamma: 0.99
        nprocs: 1
        n_steps: 20
        ent_coef: 0.01
        vf_coef: 0.25
        vf_fisher_coef: 1.0
        learning_rate: 0.25
        max_grad_norm: 0.5
        kfac_clip: 0.001
        lr_schedule: linear
        verbose: 0
        async_eigen_decomp: false
        full_tensorboard_log: false
