extra_hyperparameters: &extra_hyperparameters
    lr_account_for_nbr_actor: False 
    weights_decay_lambda: 1.0

LargeCNN: &LargeCNN
        phi_arch: 'CNN'
        actor_arch: 'None'
        critic_arch: 'None'
        
        # Phi Body:
        phi_arch_channels: [32, 64, 64]
        phi_arch_kernels: [8, 4, 3]
        phi_arch_strides: [4, 2, 1]
        phi_arch_paddings: [1, 1, 1]
        phi_arch_feature_dim: 512
        phi_arch_hidden_units: [512,]

        # Actor architecture:
        actor_arch_hidden_units: []
        # Critic architecture:
        critic_arch_hidden_units: []

        
dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5: &dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        double: False
        dueling: False
        replay_capacity: 1e6
        min_capacity: 1e3
        observation_resize_dim: 84
        discount: 0.99
        use_cuda: True
        gradient_clip: 0.5
        batch_size: 32
        tau: 1.0e-2
        learning_rate: 2.5e-4
        adam_eps: 1.0e-8
        use_PER: False
        PER_alpha: 0.6
        PER_beta: 1.0

        epsstart: 1.0
        epsend: 0.01
        epsdecay: 30000

        <<: *LargeCNN
        <<: *extra_hyperparameters


experiment:
    tasks: [
    #          {'env-id': 'QbertNoFrameskip-v4',
             
    #          'run-id': 'Seed13_venv_ppo_8actors_Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30',
    #          'agent-id': 'ppo_LargeCNN',
             
    #          'nbr_actor': 8,
    #          'nbr_frame_skipping': 4,
    #          'nbr_frame_stacking': 4,
    #          'grayscale': True,
    #          'single_life_episode': True,
    #          'nbr_max_random_steps': 30,
    #          'clip_reward': True,
    #          'observation_resize_dim': 84
    #          },

            {'env-id': 'PongNoFrameskip-v4',

             'run-id': 'Seed0_penv_dqn_Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30',
             'agent-id': 'dqn_LargeCNN_r1e4',
             
             'nbr_actor': 1,
             'nbr_frame_skipping': 4,
             'nbr_frame_stacking': 4,
             'grayscale': True,
             'single_life_episode': True,
             'nbr_max_random_steps': 30,
             'clip_reward': True,
             'observation_resize_dim': 84
             },

            # {'env-id': 'BreakoutNoFrameskip-v4',

            #  'run-id': 'Seed13_penv_ppo_8actors_Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30',
            #  'agent-id': 'ppo_LargeCNN',
             
            #  'nbr_actor': 8,
            #  'nbr_frame_skipping': 4,
            #  'nbr_frame_stacking': 4,
            #  'grayscale': True,
            #  'single_life_episode': True,
            #  'nbr_max_random_steps': 30,
            #  'clip_reward': True,
            #  'observation_resize_dim': 84
            #  },
            ]
    experiment_id: 'atari_10M_benchmark_dqn'
    benchmarking_episodes: 1
    benchmarking_interval: 1.0e4
    benchmarking_record_episode_interval: 1.0e5
    train_observation_budget: 1.0e7
    seed: 0

agents:    
    dqn_LargeCNN_r1e4:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e4