extra_hyperparameters: &extra_hyperparameters
    lr_account_for_nbr_actor: False 
    weights_decay_lambda: 1.0

LargeCNN: &LargeCNN
        phi_arch: 'CNN'
        actor_arch: 'None'
        critic_arch: 'None'
        
        # Phi Body:
        phi_arch_channels: [32, 64, 64]
        phi_arch_kernels: [8, 4, 3]
        phi_arch_strides: [4, 2, 1]
        phi_arch_paddings: [1, 1, 1]
        phi_arch_feature_dim: 512
        phi_arch_hidden_units: [512,]

        # Actor architecture:
        actor_arch_hidden_units: []
        # Critic architecture:
        critic_arch_hidden_units: []

        
dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5: &dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        double: False
        dueling: False
        replay_capacity: 1e6
        min_capacity: 1e3
        observation_resize_dim: 84
        discount: 0.99
        use_cuda: True
        gradient_clip: 0.5
        batch_size: 32
        tau: 1.0e-2
        learning_rate: 2.5e-4
        adam_eps: 1.0e-8
        use_PER: False
        PER_alpha: 0.6
        PER_beta: 1.0

        epsstart: 1.0
        epsend: 0.01
        epsdecay: 30000

        <<: *LargeCNN
        <<: *extra_hyperparameters


experiment:
    tasks: [
             {'env-id': 'QbertNoFrameskip-v4',
             
             'run-id': 'Seed10_penv_dqn_Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30',
             'agent-id': 'doubledqn_LargeCNN_r1e4',
             
             'nbr_actor': 1,
             'nbr_frame_skipping': 4,
             'nbr_frame_stacking': 4,
             'grayscale': True,
             'single_life_episode': True,
             'nbr_max_random_steps': 30,
             'clip_reward': True,
             'observation_resize_dim': 84
             },

            {'env-id': 'PongNoFrameskip-v4',

             'run-id': 'Seed10_penv_dqn_Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30',
             'agent-id': 'doubledqn_LargeCNN_r1e4',
             
             'nbr_actor': 1,
             'nbr_frame_skipping': 4,
             'nbr_frame_stacking': 4,
             'grayscale': True,
             'single_life_episode': True,
             'nbr_max_random_steps': 30,
             'clip_reward': True,
             'observation_resize_dim': 84
             },

            {'env-id': 'BreakoutNoFrameskip-v4',

             'run-id': 'Seed10_penv_dqn_Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30',
             'agent-id': 'doubledqn_LargeCNN_r1e4',
             
             'nbr_actor': 1,
             'nbr_frame_skipping': 4,
             'nbr_frame_stacking': 4,
             'grayscale': True,
             'single_life_episode': True,
             'nbr_max_random_steps': 30,
             'clip_reward': True,
             'observation_resize_dim': 84
             },
            ]
    experiment_id: 'atari_10M_benchmark_dqn'
    benchmarking_episodes: 1
    benchmarking_interval: 1.0e4
    benchmarking_record_episode_interval: 0
    train_observation_budget: 1.0e6
    seed: 10

agents:    
    doubledqn_LargeCNN_r1e4:
        <<: *dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e4
        double: True