extra_hyperparameters: &extra_hyperparameters
    lr_account_for_nbr_actor: False 
    weights_decay_lambda: 1.0
    use_target_to_gather_data:    False

LargeCNN: &LargeCNN
        phi_arch: 'CNN'
        actor_arch: 'None'
        critic_arch: 'None'
        
        # Phi Body:
        phi_arch_channels: [32, 64, 64]
        phi_arch_kernels: [8, 4, 3]
        phi_arch_strides: [4, 2, 1]
        phi_arch_paddings: [1, 1, 1]
        phi_arch_feature_dim: 512
        phi_arch_hidden_units: [512,]

        # Actor architecture:
        actor_arch_hidden_units: []
        # Critic architecture:
        critic_arch_hidden_units: []

        
dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5: &dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        double: False
        dueling: False
        noisy: False 
        n_step: 1

        use_PER: False
        PER_alpha: 0.6
        PER_beta: 1.0

        replay_capacity: 1e6
        min_capacity: 1e4
        replay_period: 1

        observation_resize_dim: 84
        discount: 0.99
        use_cuda: True
        gradient_clip: 0.5
        batch_size: 32
        tau: 1.0e-2
        learning_rate: 2.5e-4
        adam_eps: 1.0e-8

        epsstart: 1.0
        epsend: 0.01    #0.1
        epsdecay: 30000 #1000000

        <<: *LargeCNN
        <<: *extra_hyperparameters


experiment:
    tasks: [
    #          {'env-id': 'QbertNoFrameskip-v4',
             
    #          'run-id': 'Seed13_venv_ppo_8actors_Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30',
    #          'agent-id': 'ppo_LargeCNN',
             
    #          'nbr_actor': 8,
    #          'nbr_frame_skipping': 4,
    #          'nbr_frame_stacking': 4,
    #          'grayscale': True,
    #          'single_life_episode': True,
    #          'nbr_max_random_steps': 30,
    #          'clip_reward': True,
    #          'observation_resize_dim': 84
    #          },

            {'env-id': 'PongNoFrameskip-v4',

             'run-id': 'Seed1_venv1_dqn_Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30_SingleLife_ClipReward_Eps3p4End1m2_EntropyReg0_b16_RepP2_DEBUGNSTEP+ProperGammaLoss+CurrentActionLoss',
             #'agent-id': '1step_double_dqn_LargeCNN_r1e5_tau1m4',
             #'agent-id': '1step_double_dqn_LargeCNN_r1e5_tau1m3',
             #'agent-id': '1step_dueling_double_dqn_LargeCNN_r1e5_tau1m3',
             #'agent-id': '1step_prioritized_double_dqn_LargeCNN_r1e5_beta4m1_tau1m3',
             #'agent-id': '1step_prioritized_double_dqn_LargeCNN_r1e5_beta4m1_tau1m3_RepP1_b32',
             #'agent-id': '3step_double_dqn_LargeCNN_r1e5_beta4m1_tau1m3_RepP2_b16',
             #'agent-id': '20step_double_dqn_LargeCNN_r1e5_beta4m1_tau1m3_RepP2_b16',
             
             #'agent-id': '1step_double_dqn_LargeCNN_r1e5_beta4m1_tau1m3_RepP2_b16',
             'agent-id': '1step_double_dqn_LargeCNN_r2e4_beta4m1_tau1m3_RepP2_b16',
             
             #'agent-id': '5step_double_dqn_LargeCNN_r1e5_beta4m1_tau1m3_RepP2_b16',
             #'agent-id': '20step_prioritized_double_dqn_LargeCNN_r1e5_beta4m1_tau1m3_RepP2_b16',
             #'agent-id': '1step_prioritized_double_dqn_LargeCNN_r1e4_beta6m1_tau1m4',

             'nbr_actor': 1,
             'nbr_frame_skipping': 4,
             'nbr_frame_stacking': 4,
             'grayscale': True,
             'single_life_episode': True,
             'nbr_max_random_steps': 30,
             'clip_reward': True,
             'observation_resize_dim': (84,84),
             },
            
            # {'env-id': 'DoubleDunkNoFrameskip-v4', #'PongNoFrameskip-v4',

            #  'run-id': 'test_epsgreedytest_s101_WLoss_MaxInitPER_EPS4m1PER', #'Seed1_venv_greedy_dqn_Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30',
            #  'agent-id': '1step_double_dqn_LargeCNN_r1e5_tau1m4',
             
            #  'nbr_actor': 1,
            #  'nbr_frame_skipping': 4,
            #  'nbr_frame_stacking': 4,
            #  'grayscale': True,
            #  'single_life_episode': True,
            #  'nbr_max_random_steps': 30,
            #  'clip_reward': True,
            #  'observation_resize_dim': 84
            #  },

            # {'env-id': 'DoubleDunkNoFrameskip-v4', #'PongNoFrameskip-v4',

            #  'run-id': 'test_epsgreedytest_s101_WLoss_MaxInitPER_EPS4m1PER', #'Seed1_venv_greedy_dqn_Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30',
            #  'agent-id': '1step_dueling_double_dqn_LargeCNN_r1e5_tau1m4',
             
            #  'nbr_actor': 1,
            #  'nbr_frame_skipping': 4,
            #  'nbr_frame_stacking': 4,
            #  'grayscale': True,
            #  'single_life_episode': True,
            #  'nbr_max_random_steps': 30,
            #  'clip_reward': True,
            #  'observation_resize_dim': 84
            #  },

            # {'env-id': 'DoubleDunkNoFrameskip-v4', #'PongNoFrameskip-v4',

            #  'run-id': 'test_epsgreedytest_s101_WLoss_MaxInitPER_EPS4m1PER', #'Seed1_venv_greedy_dqn_Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30',
            #  'agent-id': '1step_fastprioritized_double_dqn_LargeCNN_r1e5_tau1m4',
             
            #  'nbr_actor': 1,
            #  'nbr_frame_skipping': 4,
            #  'nbr_frame_stacking': 4,
            #  'grayscale': True,
            #  'single_life_episode': True,
            #  'nbr_max_random_steps': 30,
            #  'clip_reward': True,
            #  'observation_resize_dim': 84
            #  },

            # {'env-id': 'BreakoutNoFrameskip-v4',

            #  'run-id': 'Seed13_penv_ppo_8actors_Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30',
            #  'agent-id': 'ppo_LargeCNN',
             
            #  'nbr_actor': 8,
            #  'nbr_frame_skipping': 4,
            #  'nbr_frame_stacking': 4,
            #  'grayscale': True,
            #  'single_life_episode': True,
            #  'nbr_max_random_steps': 30,
            #  'clip_reward': True,
            #  'observation_resize_dim': 84
            #  },
            ]
    experiment_id: 'r2d2_test/dqn'
    benchmarking_episodes: 1
    benchmarking_interval: 1.0e4
    benchmarking_record_episode_interval: 1.0e8
    train_observation_budget: 3.0e5 #1.0e7
    seed: 1

agents:    
    1step_double_dqn_LargeCNN_r1e5:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        double: True
        #dueling: True 
        #noisy: True 
        n_step: 1

    1step_double_dqn_LargeCNN_r1e5_targetAsMain:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        double: True
        #dueling: True 
        #noisy: True 
        n_step: 1
        use_target_to_gather_data: True 

    1step_double_dqn_LargeCNN_r1e4_tau1m4:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e4
        double: True
        #dueling: True 
        #noisy: True 
        n_step: 1
        tau: 1.0e-4
    
    # The replay_capacity hyperparameter strongly affect learning stability:
    1step_double_dqn_LargeCNN_r1e5_tau1m4:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        double: True
        #dueling: True 
        #noisy: True 
        n_step: 1
        tau: 1.0e-4

    # The tau hyperparameter strongly affect data-efficiency:
    1step_double_dqn_LargeCNN_r1e5_tau1m3:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        double: True
        #dueling: True 
        #noisy: True 
        n_step: 1
        tau: 1.0e-3

    1step_dueling_double_dqn_LargeCNN_r1e5_tau1m3:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        double: True
        dueling: True 
        #noisy: True 
        n_step: 1
        tau: 1.0e-3

    1step_dueling_double_dqn_LargeCNN_r1e5:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        double: True
        dueling: True 
        #noisy: True 
        n_step: 1

    1step_dueling_double_dqn_LargeCNN_r1e5_tau1m4:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        double: True
        dueling: True 
        #noisy: True 
        n_step: 1
        tau: 1.0e-4

    1step_prioritized_double_dqn_LargeCNN_r1e5_beta4m1_tau1m3:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        use_PER: True
        PER_beta: 0.4
        replay_period: 2
        batch_size: 16
        # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
        # but bottleneck on GPU batchsize gives a better trade-off 
        # batch-regularization-effect / speed with a batch_size=16 
        # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
        # and ~84 it/sec with updates...
        # Whereas 32 / 4 yielded ~25 it/sec....
        double: True
        #dueling: True 
        #noisy: True 
        n_step: 1
        tau: 1.0e-3

    # The amount of data gathered is the actual limitation, not the number of updates or the period...
    # The following performs similarly to RepP2_b16 on Pong.
    1step_prioritized_double_dqn_LargeCNN_r1e5_beta4m1_tau1m3_RepP1_b32:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        use_PER: True
        PER_beta: 0.4
        replay_period: 1
        batch_size: 32
        # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
        # but bottleneck on GPU batchsize gives a better trade-off 
        # batch-regularization-effect / speed with a batch_size=16 
        # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
        # and ~84 it/sec with updates...
        # Whereas 32 / 4 yielded ~25 it/sec....
        double: True
        #dueling: True 
        #noisy: True 
        n_step: 1
        tau: 1.0e-3

    3step_double_dqn_LargeCNN_r1e5_beta4m1_tau1m3_RepP2_b16:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        use_PER: False
        PER_beta: 0.4
        replay_period: 2
        batch_size: 16
        # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
        # but bottleneck on GPU batchsize gives a better trade-off 
        # batch-regularization-effect / speed with a batch_size=16 
        # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
        # and ~84 it/sec with updates...
        # Whereas 32 / 4 yielded ~25 it/sec....
        double: True
        #dueling: True 
        #noisy: True 
        n_step: 3
        tau: 1.0e-3

    20step_double_dqn_LargeCNN_r1e5_beta4m1_tau1m3_RepP2_b16:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        use_PER: False
        PER_beta: 0.4
        replay_period: 2
        batch_size: 16
        # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
        # but bottleneck on GPU batchsize gives a better trade-off 
        # batch-regularization-effect / speed with a batch_size=16 
        # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
        # and ~84 it/sec with updates...
        # Whereas 32 / 4 yielded ~25 it/sec....
        double: True
        #dueling: True 
        #noisy: True 
        n_step: 20
        tau: 1.0e-3

    1step_double_dqn_LargeCNN_r1e5_beta4m1_tau1m3_RepP2_b16:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        use_PER: False
        PER_beta: 0.4
        replay_period: 2
        batch_size: 16
        # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
        # but bottleneck on GPU batchsize gives a better trade-off 
        # batch-regularization-effect / speed with a batch_size=16 
        # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
        # and ~84 it/sec with updates...
        # Whereas 32 / 4 yielded ~25 it/sec....
        double: True
        #dueling: True 
        #noisy: True 
        n_step: 1
        tau: 1.0e-3

    1step_double_dqn_LargeCNN_r2e4_beta4m1_tau1m3_RepP2_b16:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 2e4
        use_PER: False
        PER_beta: 0.4
        replay_period: 2
        batch_size: 16
        # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
        # but bottleneck on GPU batchsize gives a better trade-off 
        # batch-regularization-effect / speed with a batch_size=16 
        # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
        # and ~84 it/sec with updates...
        # Whereas 32 / 4 yielded ~25 it/sec....
        double: True
        #dueling: True 
        #noisy: True 
        n_step: 1
        tau: 1.0e-3

    5step_double_dqn_LargeCNN_r1e5_beta4m1_tau1m3_RepP2_b16:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        use_PER: False
        PER_beta: 0.4
        replay_period: 2
        batch_size: 16
        # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
        # but bottleneck on GPU batchsize gives a better trade-off 
        # batch-regularization-effect / speed with a batch_size=16 
        # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
        # and ~84 it/sec with updates...
        # Whereas 32 / 4 yielded ~25 it/sec....
        double: True
        #dueling: True 
        #noisy: True 
        n_step: 5
        tau: 1.0e-3

    20step_prioritized_double_dqn_LargeCNN_r1e5_beta4m1_tau1m3_RepP2_b16:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        use_PER: True
        PER_beta: 0.4
        replay_period: 2
        batch_size: 16
        # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
        # but bottleneck on GPU batchsize gives a better trade-off 
        # batch-regularization-effect / speed with a batch_size=16 
        # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
        # and ~84 it/sec with updates...
        # Whereas 32 / 4 yielded ~25 it/sec....
        double: True
        #dueling: True 
        #noisy: True 
        n_step: 20
        tau: 1.0e-3

    1step_prioritized_double_dqn_LargeCNN_r1e4_beta6m1_tau1m4:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e4
        use_PER: True
        PER_beta: 0.6
        replay_period: 2    
        batch_size: 16
        # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
        # but bottleneck on GPU batchsize gives a better trade-off 
        # batch-regularization-effect / speed with a batch_size=16 
        # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
        # and ~84 it/sec with updates...
        # Whereas 32 / 4 yielded ~25 it/sec....
        double: True
        #dueling: True 
        noisy: False 
        n_step: 1
        tau: 1.0e-4
        use_target_to_gather_data: False

    1step_noisy_fastprioritized_double_dqn_LargeCNN_r1e4_beta6m1_tau1m4:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e4
        use_PER: True
        PER_beta: 0.6
        replay_period: 2    
        batch_size: 16
        # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
        # but bottleneck on GPU batchsize gives a better trade-off 
        # batch-regularization-effect / speed with a batch_size=16 
        # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
        # and ~84 it/sec with updates...
        # Whereas 32 / 4 yielded ~25 it/sec....
        double: True
        #dueling: True 
        noisy: True 
        n_step: 1
        tau: 1.0e-4
        use_target_to_gather_data: False
    
    1step_noisy_double_dqn_LargeCNN_r1e5:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        double: True
        #dueling: True 
        noisy: True 
        n_step: 1

    1step_noisy_double_dueling_dqn_LargeCNN_r1e5:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        double: True
        dueling: True 
        noisy: True 
        n_step: 1

    3step_noisy_double_dqn_LargeCNN_r1e5:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        double: True
        #dueling: True 
        noisy: True 
        n_step: 3

    5step_noisy_double_dqn_LargeCNN_r1e5:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        double: True
        #dueling: True 
        noisy: True 
        n_step: 5

    1step_noisy_fastprioritized_double_dqn_LargeCNN_r1e5_B4m1:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        use_PER: True
        PER_beta: 0.5
        replay_period: 2    
        batch_size: 16
        # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
        # but bottleneck on GPU batchsize gives a better trade-off 
        # batch-regularization-effect / speed with a batch_size=16 
        # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
        # and ~84 it/sec with updates...
        # Whereas 32 / 4 yielded ~25 it/sec....
        double: True
        #dueling: True 
        #noisy: True 
        n_step: 1

    1step_noisy_fastprioritized_double_dqn_LargeCNN_r1e5_B4m1:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        use_PER: True
        PER_beta: 0.4
        replay_period: 2    
        batch_size: 16
        # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
        # but bottleneck on GPU batchsize gives a better trade-off 
        # batch-regularization-effect / speed with a batch_size=16 
        # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
        # and ~84 it/sec with updates...
        # Whereas 32 / 4 yielded ~25 it/sec....
        double: True
        #dueling: True 
        noisy: True 
        n_step: 1

    3step_noisy_fastprioritized_double_dqn_LargeCNN_r1e5_B4m1:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        use_PER: True
        PER_beta: 0.4
        replay_period: 2    
        batch_size: 16
        # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
        # but bottleneck on GPU batchsize gives a better trade-off 
        # batch-regularization-effect / speed with a batch_size=16 
        # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
        # and ~84 it/sec with updates...
        # Whereas 32 / 4 yielded ~25 it/sec....
        double: True
        #dueling: True 
        noisy: True 
        n_step: 3

    5step_noisy_fastprioritized_double_dqn_LargeCNN_r1e5_B4m1:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        use_PER: True
        PER_beta: 0.4
        replay_period: 2    
        batch_size: 16
        # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
        # but bottleneck on GPU batchsize gives a better trade-off 
        # batch-regularization-effect / speed with a batch_size=16 
        # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
        # and ~84 it/sec with updates...
        # Whereas 32 / 4 yielded ~25 it/sec....
        double: True
        #dueling: True 
        noisy: True 
        n_step: 5

    5step_noisy_fastprioritized_double_dueling_dqn_LargeCNN_r1e5_B4m1_tau1m3:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        tau: 1.0e-3
        replay_capacity: 1e5
        use_PER: True
        PER_beta: 0.4
        replay_period: 2    
        batch_size: 16
        # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
        # but bottleneck on GPU batchsize gives a better trade-off 
        # batch-regularization-effect / speed with a batch_size=16 
        # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
        # and ~84 it/sec with updates...
        # Whereas 32 / 4 yielded ~25 it/sec....
        double: True
        dueling: True 
        noisy: True 
        n_step: 5

        
        