environment:
    iterations: 100
    instances:
    - instance0501
    - instance0502
    - instance0503
    - instance0504
    - instance0505
    - instance0506
    - instance0507
    - instance0508
    - instance0509
    - instance0510
    - instance0511
    - instance0512
    - instance0513
    - instance0514
    - instance0515
    - instance0516
    - instance0517
    - instance0518
    - instance0519
    - instance0520
    - instance0521
    - instance0522
    - instance0523
    - instance0524
    - instance0525
    - instance0526
    - instance0527
    - instance0528
    - instance0529
    - instance0530
    - instance0531
    - instance0532
    - instance0533
    - instance0534
    - instance0535
    - instance0536
    - instance0537
    - instance0538
    - instance0539
    - instance0540
    - instance0541
    - instance0542
    - instance0543
    - instance0544
    - instance0545
    - instance0546
    - instance0547
    - instance0548
    - instance0549
    - instance0550
    - instance0551
    - instance0552
    - instance0553
    - instance0554
    - instance0555
    - instance0556
    - instance0557
    - instance0558
    - instance0559
    - instance0560
    - instance0561
    - instance0562
    - instance0563
    - instance0564
    - instance0565
    - instance0566
    - instance0567
    - instance0568
    - instance0569
    - instance0570
    - instance0571
    - instance0572
    - instance0573
    - instance0574
    - instance0575
    - instance0576
    - instance0577
    - instance0578
    - instance0579
    - instance0580
    - instance0581
    - instance0582
    - instance0583
    - instance0584
    - instance0585
    - instance0586
    - instance0587
    - instance0588
    - instance0589
    - instance0590
    - instance0591
    - instance0592
    - instance0593
    - instance0594
    - instance0595
    - instance0596
    - instance0597
    - instance0598
    - instance0599
    - instance0600
    - instance0601
    - instance0602
    - instance0603
    - instance0604
    - instance0605
    - instance0606
    - instance0607
    - instance0608
    - instance0609
    - instance0610
    - instance0611
    - instance0612
    - instance0613
    - instance0614
    - instance0615
    - instance0616
    - instance0617
    - instance0618
    - instance0619
    - instance0620
    - instance0621
    - instance0622
    - instance0623
    - instance0624
    - instance0625
    - instance0626
    - instance0627
    - instance0628
    - instance0629
    - instance0630
    - instance0631
    - instance0632
    - instance0633
    - instance0634
    - instance0635
    - instance0636
    - instance0637
    - instance0638
    - instance0639
    - instance0640
    - instance0641
    - instance0642
    - instance0643
    - instance0644
    - instance0645
    - instance0646
    - instance0647
    - instance0648
    - instance0649
    - instance0650
    - instance0651
    - instance0652
    - instance0653
    - instance0654
    - instance0655
    - instance0656
    - instance0657
    - instance0658
    - instance0659
    - instance0660
    - instance0661
    - instance0662
    - instance0663
    - instance0664
    - instance0665
    - instance0666
    - instance0667
    - instance0668
    - instance0669
    - instance0670
    - instance0671
    - instance0672
    - instance0673
    - instance0674
    - instance0675
    - instance0676
    - instance0677
    - instance0678
    - instance0679
    - instance0680
    - instance0681
    - instance0682
    - instance0683
    - instance0684
    - instance0685
    - instance0686
    - instance0687
    - instance0688
    - instance0689
    - instance0690
    - instance0691
    - instance0692
    - instance0693
    - instance0694
    - instance0695
    - instance0696
    - instance0697
    - instance0698
    - instance0699
    - instance0700
    - instance0701
    - instance0702
    - instance0703
    - instance0704
    - instance0705
    - instance0706
    - instance0707
    - instance0708
    - instance0709
    - instance0710
    - instance0711
    - instance0712
    - instance0713
    - instance0714
    - instance0715
    - instance0716
    - instance0717
    - instance0718
    - instance0719
    - instance0720
    - instance0721
    - instance0722
    - instance0723
    - instance0724
    - instance0725
    - instance0726
    - instance0727
    - instance0728
    - instance0729
    - instance0730
    - instance0731
    - instance0732
    - instance0733
    - instance0734
    - instance0735
    - instance0736
    - instance0737
    - instance0738
    - instance0739
    - instance0740
    - instance0741
    - instance0742
    - instance0743
    - instance0744
    - instance0745
    - instance0746
    - instance0747
    - instance0748
    - instance0749
    - instance0750
main:
    model: PPO
    policy: ActorCriticPolicy
    n_workers: 10
    n_steps: 300000
    save_every: 20000
    logs: null
models:
    PPO:
        n_steps: 256
        batch_size: 64
        n_epochs: 10
        gamma: 0.99
        gae_lambda: 0.95
        clip_range: 0.2
        ent_coef: 0.0
        learning_rate: 0.0003
        vf_coef: 0.5
        max_grad_norm: 0.5
        verbose: 1
    DQN:
        gamma: 0.99
        learning_rate: 0.001
        buffer_size: 20000
        exploration_fraction: 0.1
        exploration_final_eps: 0.01
        train_freq: 1
        batch_size: 32
        learning_starts: 1000
        target_network_update_freq: 500
        prioritized_replay: false
        prioritized_replay_alpha: 0.2
        prioritized_replay_beta0: 0.4
        prioritized_replay_beta_iters: None
        prioritized_replay_eps: 1.0e-06
        param_noise: false
        verbose: 1
        full_tensorboard_log: false
        _init_setup_model: true
    A2C:
        learning_rate: 0.0007
        n_steps: 5
        gamma: 0.99
        gae_lambda: 1.0
        ent_coef: 0.0
        vf_coef: 0.5
        max_grad_norm: 0.5
        verbose: 1
    ACER:
        gamma: 0.99
        n_steps: 20
        num_procs: 1
        q_coef: 0.5
        ent_coef: 0.01
        max_grad_norm: 10
        learning_rate: 0.0007
        lr_schedule: linear
        rprop_alpha: 0.99
        rprop_epsilon: 0.0001
        buffer_size: 5000
        replay_ratio: 4
        replay_start: 1000
        correction_term: 10.0
        trust_region: true
        alpha: 0.99
        delta: 1
        verbose: 0
    ACKTR:
        gamma: 0.99
        nprocs: 1
        n_steps: 20
        ent_coef: 0.01
        vf_coef: 0.25
        vf_fisher_coef: 1.0
        learning_rate: 0.25
        max_grad_norm: 0.5
        kfac_clip: 0.001
        lr_schedule: linear
        verbose: 0
        async_eigen_decomp: false
        full_tensorboard_log: false
