extra_hyperparameters: &extra_hyperparameters
    lr_account_for_nbr_actor: False 
    weights_decay_lambda: 1.0
    use_target_to_gather_data:    False

LargeCNN: &LargeCNN
        phi_arch: 'CNN'
        actor_arch: 'None'
        critic_arch: 'None'
        
        # Phi Body:
        phi_arch_channels: [32, 64, 64]
        phi_arch_kernels: [8, 4, 3]
        phi_arch_strides: [4, 2, 1]
        phi_arch_paddings: [1, 1, 1]
        phi_arch_feature_dim: 512
        phi_arch_hidden_units: [512,]

        # Actor architecture:
        actor_arch_hidden_units: []
        # Critic architecture:
        critic_arch_hidden_units: []

        
dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5: &dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        double: False
        dueling: False
        noisy: False 
        n_step: 1

        use_PER: False
        PER_alpha: 0.6
        PER_beta: 1.0

        replay_capacity: 1e6
        min_capacity: 1e4
        replay_period: 1

        observation_resize_dim: 84
        discount: 0.99
        use_cuda: True
        gradient_clip: 0.5
        batch_size: 32
        tau: 1.0e-2
        learning_rate: 2.5e-4
        adam_eps: 1.0e-8

        epsstart: 1.0
        epsend: 0.01    #0.1
        epsdecay: 30000 #1000000

        <<: *LargeCNN
        <<: *extra_hyperparameters


experiment:
    tasks: [
    #          {'env-id': 'QbertNoFrameskip-v4',
             
    #          'run-id': 'Seed13_venv_ppo_8actors_Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30',
    #          'agent-id': 'ppo_LargeCNN',
             
    #          'nbr_actor': 8,
    #          'nbr_frame_skipping': 4,
    #          'nbr_frame_stacking': 4,
    #          'grayscale': True,
    #          'single_life_episode': True,
    #          'nbr_max_random_steps': 30,
    #          'clip_reward': True,
    #          'observation_resize_dim': 84
    #          },

            # {'env-id': 'FreewayNoFrameskip-v4', #'PongNoFrameskip-v4',

            #  'run-id': 'test', #'Seed1_venv_greedy_dqn_Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30',
            #  'agent-id': '3step_fastprioritized_noisy_dueling_double_dqn_LargeCNN_r1e5',
             
            #  'nbr_actor': 1,
            #  'nbr_frame_skipping': 4,
            #  'nbr_frame_stacking': 4,
            #  'grayscale': True,
            #  'single_life_episode': True,
            #  'nbr_max_random_steps': 30,
            #  'clip_reward': True,
            #  'observation_resize_dim': 84
            #  },

            # {'env-id': 'PongNoFrameskip-v4', #'PongNoFrameskip-v4',

            #  'run-id': 'test_s1_WLoss_MaxInitPER_EPS4m1PER', #'Seed1_venv_greedy_dqn_Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30',
            #  'agent-id': '1step_noisy_fastprioritized_double_dqn_LargeCNN_r1e5_B4m1',
             
            #  'nbr_actor': 1,
            #  'nbr_frame_skipping': 4,
            #  'nbr_frame_stacking': 4,
            #  'grayscale': True,
            #  'single_life_episode': True,
            #  'nbr_max_random_steps': 30,
            #  'clip_reward': True,
            #  'observation_resize_dim': 84
            #  },

            # {'env-id': 'PongNoFrameskip-v4', #'PongNoFrameskip-v4',

            #  'run-id': 'test_s1_WLoss_MaxInitPER_EPS4m1PER', #'Seed1_venv_greedy_dqn_Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30',
            #  'agent-id': '3step_noisy_fastprioritized_double_dqn_LargeCNN_r1e5_B4m1',
             
            #  'nbr_actor': 1,
            #  'nbr_frame_skipping': 4,
            #  'nbr_frame_stacking': 4,
            #  'grayscale': True,
            #  'single_life_episode': True,
            #  'nbr_max_random_steps': 30,
            #  'clip_reward': True,
            #  'observation_resize_dim': 84
            #  },

            # {'env-id': 'RobotankNoFrameskip-v4', #'PongNoFrameskip-v4',

            #  'run-id': 'test_s1', #'Seed1_venv_greedy_dqn_Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30',
            #  'agent-id': '1step_noisy_double_dqn_LargeCNN_r1e5',
             
            #  'nbr_actor': 1,
            #  'nbr_frame_skipping': 4,
            #  'nbr_frame_stacking': 4,
            #  'grayscale': True,
            #  'single_life_episode': True,
            #  'nbr_max_random_steps': 30,
            #  'clip_reward': True,
            #  'observation_resize_dim': 84
            #  },

            # {'env-id': 'RobotankNoFrameskip-v4', #'PongNoFrameskip-v4',

            #  'run-id': 'test_s1', #'Seed1_venv_greedy_dqn_Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30',
            #  'agent-id': '3step_noisy_double_dqn_LargeCNN_r1e5',
             
            #  'nbr_actor': 1,
            #  'nbr_frame_skipping': 4,
            #  'nbr_frame_stacking': 4,
            #  'grayscale': True,
            #  'single_life_episode': True,
            #  'nbr_max_random_steps': 30,
            #  'clip_reward': True,
            #  'observation_resize_dim': 84
            #  },

            # {'env-id': 'RobotankNoFrameskip-v4', #'PongNoFrameskip-v4',

            #  'run-id': 'test_s1', #'Seed1_venv_greedy_dqn_Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30',
            #  'agent-id': '5step_noisy_double_dqn_LargeCNN_r1e5',
             
            #  'nbr_actor': 1,
            #  'nbr_frame_skipping': 4,
            #  'nbr_frame_stacking': 4,
            #  'grayscale': True,
            #  'single_life_episode': True,
            #  'nbr_max_random_steps': 30,
            #  'clip_reward': True,
            #  'observation_resize_dim': 84
            #  },

            # {'env-id': 'RobotankNoFrameskip-v4', #'PongNoFrameskip-v4',

            #  'run-id': 'test_s1_WLoss_MaxInitPER_EPS4m1PER', #'Seed1_venv_greedy_dqn_Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30',
            #  'agent-id': '1step_noisy_fastprioritized_double_dqn_LargeCNN_r1e5_B4m1',
             
            #  'nbr_actor': 1,
            #  'nbr_frame_skipping': 4,
            #  'nbr_frame_stacking': 4,
            #  'grayscale': True,
            #  'single_life_episode': True,
            #  'nbr_max_random_steps': 30,
            #  'clip_reward': True,
            #  'observation_resize_dim': 84
            #  },

            # {'env-id': 'RobotankNoFrameskip-v4', #'PongNoFrameskip-v4',

            #  'run-id': 'test_s1_WLoss_MaxInitPER_EPS4m1PER', #'Seed1_venv_greedy_dqn_Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30',
            #  'agent-id': '3step_noisy_fastprioritized_double_dqn_LargeCNN_r1e5_B4m1',
             
            #  'nbr_actor': 1,
            #  'nbr_frame_skipping': 4,
            #  'nbr_frame_stacking': 4,
            #  'grayscale': True,
            #  'single_life_episode': True,
            #  'nbr_max_random_steps': 30,
            #  'clip_reward': True,
            #  'observation_resize_dim': 84
            #  },

            # {'env-id': 'RobotankNoFrameskip-v4', #'PongNoFrameskip-v4',

            #  'run-id': 'test_s1_WLoss_MaxInitPER_EPS4m1PER', #'Seed1_venv_greedy_dqn_Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30',
            #  'agent-id': '5step_noisy_fastprioritized_double_dueling_dqn_LargeCNN_r1e5_B4m1_tau1m3',
             
            #  'nbr_actor': 1,
            #  'nbr_frame_skipping': 4,
            #  'nbr_frame_stacking': 4,
            #  'grayscale': True,
            #  'single_life_episode': True,
            #  'nbr_max_random_steps': 30,
            #  'clip_reward': True,
            #  'observation_resize_dim': 84
            #  },

            # {'env-id': 'RobotankNoFrameskip-v4', #'PongNoFrameskip-v4',

            #  'run-id': 'test_s100_WLoss_MaxInitPER_EPS4m1PER', #'Seed1_venv_greedy_dqn_Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30',
            #  'agent-id': '1step_noisy_double_dueling_dqn_LargeCNN_r1e5',
             
            #  'nbr_actor': 1,
            #  'nbr_frame_skipping': 4,
            #  'nbr_frame_stacking': 4,
            #  'grayscale': True,
            #  'single_life_episode': True,
            #  'nbr_max_random_steps': 30,
            #  'clip_reward': True,
            #  'observation_resize_dim': 84
            #  },

            {'env-id': 'PongNoFrameskip-v4', #'PongNoFrameskip-v4',

             'run-id': 'Seed1_venv_dqn_Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30_SingleLife_ClipReward_Eps3p4End1m2',
             #'agent-id': '1step_double_dqn_LargeCNN_r1e5_tau1m4',
             #'agent-id': '1step_double_dqn_LargeCNN_r1e5_tau1m4_targetAsMain',
             'agent-id': '1step_noisy_fastprioritized_double_dqn_LargeCNN_r1e5_beta6m1_tau1m4',

             'nbr_actor': 1,
             'nbr_frame_skipping': 4,
             'nbr_frame_stacking': 4,
             'grayscale': True,
             'single_life_episode': True,
             'nbr_max_random_steps': 30,
             'clip_reward': True,
             'observation_resize_dim': 84
             },
            
            # {'env-id': 'DoubleDunkNoFrameskip-v4', #'PongNoFrameskip-v4',

            #  'run-id': 'test_epsgreedytest_s101_WLoss_MaxInitPER_EPS4m1PER', #'Seed1_venv_greedy_dqn_Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30',
            #  'agent-id': '1step_double_dqn_LargeCNN_r1e5_tau1m4',
             
            #  'nbr_actor': 1,
            #  'nbr_frame_skipping': 4,
            #  'nbr_frame_stacking': 4,
            #  'grayscale': True,
            #  'single_life_episode': True,
            #  'nbr_max_random_steps': 30,
            #  'clip_reward': True,
            #  'observation_resize_dim': 84
            #  },

            # {'env-id': 'DoubleDunkNoFrameskip-v4', #'PongNoFrameskip-v4',

            #  'run-id': 'test_epsgreedytest_s101_WLoss_MaxInitPER_EPS4m1PER', #'Seed1_venv_greedy_dqn_Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30',
            #  'agent-id': '1step_dueling_double_dqn_LargeCNN_r1e5_tau1m4',
             
            #  'nbr_actor': 1,
            #  'nbr_frame_skipping': 4,
            #  'nbr_frame_stacking': 4,
            #  'grayscale': True,
            #  'single_life_episode': True,
            #  'nbr_max_random_steps': 30,
            #  'clip_reward': True,
            #  'observation_resize_dim': 84
            #  },

            # {'env-id': 'DoubleDunkNoFrameskip-v4', #'PongNoFrameskip-v4',

            #  'run-id': 'test_epsgreedytest_s101_WLoss_MaxInitPER_EPS4m1PER', #'Seed1_venv_greedy_dqn_Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30',
            #  'agent-id': '1step_fastprioritized_double_dqn_LargeCNN_r1e5_tau1m4',
             
            #  'nbr_actor': 1,
            #  'nbr_frame_skipping': 4,
            #  'nbr_frame_stacking': 4,
            #  'grayscale': True,
            #  'single_life_episode': True,
            #  'nbr_max_random_steps': 30,
            #  'clip_reward': True,
            #  'observation_resize_dim': 84
            #  },

            # {'env-id': 'BreakoutNoFrameskip-v4',

            #  'run-id': 'Seed13_penv_ppo_8actors_Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30',
            #  'agent-id': 'ppo_LargeCNN',
             
            #  'nbr_actor': 8,
            #  'nbr_frame_skipping': 4,
            #  'nbr_frame_stacking': 4,
            #  'grayscale': True,
            #  'single_life_episode': True,
            #  'nbr_max_random_steps': 30,
            #  'clip_reward': True,
            #  'observation_resize_dim': 84
            #  },
            ]
    experiment_id: 'pong_300k_benchmark_dqn'
    benchmarking_episodes: 1
    benchmarking_interval: 1.0e4
    benchmarking_record_episode_interval: 1.0e8
    train_observation_budget: 3.0e5 #2.0e6 #1.0e7
    seed: 1

agents:    
    1step_double_dqn_LargeCNN_r1e5:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        double: True
        #dueling: True 
        #noisy: True 
        n_step: 1

    1step_double_dqn_LargeCNN_r1e5_targetAsMain:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        double: True
        #dueling: True 
        #noisy: True 
        n_step: 1
        use_target_to_gather_data: True 

    1step_double_dqn_LargeCNN_r1e5_tau1m4:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        double: True
        #dueling: True 
        #noisy: True 
        n_step: 1
        tau: 1.0e-4

    1step_double_dqn_LargeCNN_r1e5_tau1m4_targetAsMain:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        double: True
        #dueling: True 
        #noisy: True 
        n_step: 1
        tau: 1.0e-4
        use_target_to_gather_data: True

    1step_dueling_double_dqn_LargeCNN_r1e5:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        double: True
        dueling: True 
        #noisy: True 
        n_step: 1

    1step_dueling_double_dqn_LargeCNN_r1e5_tau1m4:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        double: True
        dueling: True 
        #noisy: True 
        n_step: 1
        tau: 1.0e-4

    1step_fastprioritized_double_dqn_LargeCNN_r1e5_tau1m4:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        use_PER: True
        PER_beta: 0.5
        replay_period: 2    
        batch_size: 16
        # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
        # but bottleneck on GPU batchsize gives a better trade-off 
        # batch-regularization-effect / speed with a batch_size=16 
        # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
        # and ~84 it/sec with updates...
        # Whereas 32 / 4 yielded ~25 it/sec....
        double: True
        #dueling: True 
        #noisy: True 
        n_step: 1
        tau: 1.0e-4

    1step_fastprioritized_double_dqn_LargeCNN_r5e5_beta4m1_tau1m4:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 5e5
        use_PER: True
        PER_beta: 0.4
        replay_period: 2    
        batch_size: 16
        # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
        # but bottleneck on GPU batchsize gives a better trade-off 
        # batch-regularization-effect / speed with a batch_size=16 
        # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
        # and ~84 it/sec with updates...
        # Whereas 32 / 4 yielded ~25 it/sec....
        double: True
        #dueling: True 
        #noisy: True 
        n_step: 1
        tau: 1.0e-4
        use_target_to_gather_data: False

    1step_noisy_fastprioritized_double_dqn_LargeCNN_r1e5_beta6m1_tau1m4:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 3e5
        use_PER: True
        PER_beta: 0.6
        replay_period: 2    
        batch_size: 16
        # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
        # but bottleneck on GPU batchsize gives a better trade-off 
        # batch-regularization-effect / speed with a batch_size=16 
        # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
        # and ~84 it/sec with updates...
        # Whereas 32 / 4 yielded ~25 it/sec....
        double: True
        #dueling: True 
        noisy: True 
        n_step: 1
        tau: 1.0e-4
        use_target_to_gather_data: False
    
    1step_noisy_double_dqn_LargeCNN_r1e5:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        double: True
        #dueling: True 
        noisy: True 
        n_step: 1

    1step_noisy_double_dueling_dqn_LargeCNN_r1e5:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        double: True
        dueling: True 
        noisy: True 
        n_step: 1

    3step_noisy_double_dqn_LargeCNN_r1e5:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        double: True
        #dueling: True 
        noisy: True 
        n_step: 3

    5step_noisy_double_dqn_LargeCNN_r1e5:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        double: True
        #dueling: True 
        noisy: True 
        n_step: 5

    1step_noisy_fastprioritized_double_dqn_LargeCNN_r1e5_B4m1:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        use_PER: True
        PER_beta: 0.5
        replay_period: 2    
        batch_size: 16
        # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
        # but bottleneck on GPU batchsize gives a better trade-off 
        # batch-regularization-effect / speed with a batch_size=16 
        # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
        # and ~84 it/sec with updates...
        # Whereas 32 / 4 yielded ~25 it/sec....
        double: True
        #dueling: True 
        #noisy: True 
        n_step: 1

    1step_noisy_fastprioritized_double_dqn_LargeCNN_r1e5_B4m1:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        use_PER: True
        PER_beta: 0.4
        replay_period: 2    
        batch_size: 16
        # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
        # but bottleneck on GPU batchsize gives a better trade-off 
        # batch-regularization-effect / speed with a batch_size=16 
        # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
        # and ~84 it/sec with updates...
        # Whereas 32 / 4 yielded ~25 it/sec....
        double: True
        #dueling: True 
        noisy: True 
        n_step: 1

    3step_noisy_fastprioritized_double_dqn_LargeCNN_r1e5_B4m1:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        use_PER: True
        PER_beta: 0.4
        replay_period: 2    
        batch_size: 16
        # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
        # but bottleneck on GPU batchsize gives a better trade-off 
        # batch-regularization-effect / speed with a batch_size=16 
        # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
        # and ~84 it/sec with updates...
        # Whereas 32 / 4 yielded ~25 it/sec....
        double: True
        #dueling: True 
        noisy: True 
        n_step: 3

    5step_noisy_fastprioritized_double_dqn_LargeCNN_r1e5_B4m1:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        use_PER: True
        PER_beta: 0.4
        replay_period: 2    
        batch_size: 16
        # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
        # but bottleneck on GPU batchsize gives a better trade-off 
        # batch-regularization-effect / speed with a batch_size=16 
        # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
        # and ~84 it/sec with updates...
        # Whereas 32 / 4 yielded ~25 it/sec....
        double: True
        #dueling: True 
        noisy: True 
        n_step: 5

    5step_noisy_fastprioritized_double_dueling_dqn_LargeCNN_r1e5_B4m1_tau1m3:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        tau: 1.0e-3
        replay_capacity: 1e5
        use_PER: True
        PER_beta: 0.4
        replay_period: 2    
        batch_size: 16
        # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
        # but bottleneck on GPU batchsize gives a better trade-off 
        # batch-regularization-effect / speed with a batch_size=16 
        # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
        # and ~84 it/sec with updates...
        # Whereas 32 / 4 yielded ~25 it/sec....
        double: True
        dueling: True 
        noisy: True 
        n_step: 5

        
        