environment:
    iterations: 100
    instances:
    - instance0251
    - instance0252
    - instance0253
    - instance0254
    - instance0255
    - instance0256
    - instance0257
    - instance0258
    - instance0259
    - instance0260
    - instance0261
    - instance0262
    - instance0263
    - instance0264
    - instance0265
    - instance0266
    - instance0267
    - instance0268
    - instance0269
    - instance0270
    - instance0271
    - instance0272
    - instance0273
    - instance0274
    - instance0275
    - instance0276
    - instance0277
    - instance0278
    - instance0279
    - instance0280
    - instance0281
    - instance0282
    - instance0283
    - instance0284
    - instance0285
    - instance0286
    - instance0287
    - instance0288
    - instance0289
    - instance0290
    - instance0291
    - instance0292
    - instance0293
    - instance0294
    - instance0295
    - instance0296
    - instance0297
    - instance0298
    - instance0299
    - instance0300
    - instance0301
    - instance0302
    - instance0303
    - instance0304
    - instance0305
    - instance0306
    - instance0307
    - instance0308
    - instance0309
    - instance0310
    - instance0311
    - instance0312
    - instance0313
    - instance0314
    - instance0315
    - instance0316
    - instance0317
    - instance0318
    - instance0319
    - instance0320
    - instance0321
    - instance0322
    - instance0323
    - instance0324
    - instance0325
    - instance0326
    - instance0327
    - instance0328
    - instance0329
    - instance0330
    - instance0331
    - instance0332
    - instance0333
    - instance0334
    - instance0335
    - instance0336
    - instance0337
    - instance0338
    - instance0339
    - instance0340
    - instance0341
    - instance0342
    - instance0343
    - instance0344
    - instance0345
    - instance0346
    - instance0347
    - instance0348
    - instance0349
    - instance0350
    - instance0351
    - instance0352
    - instance0353
    - instance0354
    - instance0355
    - instance0356
    - instance0357
    - instance0358
    - instance0359
    - instance0360
    - instance0361
    - instance0362
    - instance0363
    - instance0364
    - instance0365
    - instance0366
    - instance0367
    - instance0368
    - instance0369
    - instance0370
    - instance0371
    - instance0372
    - instance0373
    - instance0374
    - instance0375
    - instance0376
    - instance0377
    - instance0378
    - instance0379
    - instance0380
    - instance0381
    - instance0382
    - instance0383
    - instance0384
    - instance0385
    - instance0386
    - instance0387
    - instance0388
    - instance0389
    - instance0390
    - instance0391
    - instance0392
    - instance0393
    - instance0394
    - instance0395
    - instance0396
    - instance0397
    - instance0398
    - instance0399
    - instance0400
    - instance0401
    - instance0402
    - instance0403
    - instance0404
    - instance0405
    - instance0406
    - instance0407
    - instance0408
    - instance0409
    - instance0410
    - instance0411
    - instance0412
    - instance0413
    - instance0414
    - instance0415
    - instance0416
    - instance0417
    - instance0418
    - instance0419
    - instance0420
    - instance0421
    - instance0422
    - instance0423
    - instance0424
    - instance0425
    - instance0426
    - instance0427
    - instance0428
    - instance0429
    - instance0430
    - instance0431
    - instance0432
    - instance0433
    - instance0434
    - instance0435
    - instance0436
    - instance0437
    - instance0438
    - instance0439
    - instance0440
    - instance0441
    - instance0442
    - instance0443
    - instance0444
    - instance0445
    - instance0446
    - instance0447
    - instance0448
    - instance0449
    - instance0450
    - instance0451
    - instance0452
    - instance0453
    - instance0454
    - instance0455
    - instance0456
    - instance0457
    - instance0458
    - instance0459
    - instance0460
    - instance0461
    - instance0462
    - instance0463
    - instance0464
    - instance0465
    - instance0466
    - instance0467
    - instance0468
    - instance0469
    - instance0470
    - instance0471
    - instance0472
    - instance0473
    - instance0474
    - instance0475
    - instance0476
    - instance0477
    - instance0478
    - instance0479
    - instance0480
    - instance0481
    - instance0482
    - instance0483
    - instance0484
    - instance0485
    - instance0486
    - instance0487
    - instance0488
    - instance0489
    - instance0490
    - instance0491
    - instance0492
    - instance0493
    - instance0494
    - instance0495
    - instance0496
    - instance0497
    - instance0498
    - instance0499
    - instance0500
main:
    model: PPO
    policy: ActorCriticPolicy
    n_workers: 10
    n_steps: 300000
    save_every: 20000
    logs: null
models:
    PPO:
        n_steps: 256
        batch_size: 64
        n_epochs: 10
        gamma: 0.99
        gae_lambda: 0.95
        clip_range: 0.2
        ent_coef: 0.0
        learning_rate: 0.0003
        vf_coef: 0.5
        max_grad_norm: 0.5
        verbose: 1
    DQN:
        gamma: 0.99
        learning_rate: 0.001
        buffer_size: 20000
        exploration_fraction: 0.1
        exploration_final_eps: 0.01
        train_freq: 1
        batch_size: 32
        learning_starts: 1000
        target_network_update_freq: 500
        prioritized_replay: false
        prioritized_replay_alpha: 0.2
        prioritized_replay_beta0: 0.4
        prioritized_replay_beta_iters: None
        prioritized_replay_eps: 1.0e-06
        param_noise: false
        verbose: 1
        full_tensorboard_log: false
        _init_setup_model: true
    A2C:
        learning_rate: 0.0007
        n_steps: 5
        gamma: 0.99
        gae_lambda: 1.0
        ent_coef: 0.0
        vf_coef: 0.5
        max_grad_norm: 0.5
        verbose: 1
    ACER:
        gamma: 0.99
        n_steps: 20
        num_procs: 1
        q_coef: 0.5
        ent_coef: 0.01
        max_grad_norm: 10
        learning_rate: 0.0007
        lr_schedule: linear
        rprop_alpha: 0.99
        rprop_epsilon: 0.0001
        buffer_size: 5000
        replay_ratio: 4
        replay_start: 1000
        correction_term: 10.0
        trust_region: true
        alpha: 0.99
        delta: 1
        verbose: 0
    ACKTR:
        gamma: 0.99
        nprocs: 1
        n_steps: 20
        ent_coef: 0.01
        vf_coef: 0.25
        vf_fisher_coef: 1.0
        learning_rate: 0.25
        max_grad_norm: 0.5
        kfac_clip: 0.001
        lr_schedule: linear
        verbose: 0
        async_eigen_decomp: false
        full_tensorboard_log: false
