extra_hyperparameters: &extra_hyperparameters
    standardized_adv: False
    lr_account_for_nbr_actor: False 

    # BetaVAE:
    use_vae: False
    vae_weight: 1e0
    vae_nbr_latent_dim: 128
    vae_decoder_nbr_layer: 3#4
    vae_decoder_conv_dim: 128
    
    cnn_encoder_feature_dim: 128 #vae_nbr_latent_dim
    
    vae_beta: 1e2
    vae_max_capacity: 1e2
    vae_nbr_epoch_till_max_capacity: 20
    vae_constrainedEncoding: False
    vae_tc_discriminator_hidden_units: [256,256,256,256,2] #tuple([2*cnn_encoder_feature_dim']]*4+[2])
    
    # Random Network Distillation:
    use_random_network_distillation: False
    intrinsic_discount: 0.99
    rnd_loss_int_ratio: 0.5
    rnd_obs_clip: 5
    rnd_non_episodic_int_r: True
    rnd_update_period_running_meanstd_int_reward: 1.e5
    rnd_update_period_running_meanstd_obs: 1.e5 #rnd_update_period_running_meanstd_int_reward
    # RND Convolutional Architecture:
    rnd_arch: 'CNN'
    rnd_arch_channels: [32, 64, 64]
    rnd_arch_kernels: [8, 4, 3]
    rnd_arch_strides: [4, 2, 1]
    rnd_arch_paddings: [0, 1, 1]
    rnd_arch_feature_dim: 512
    # RND Fully-Connected Architecture:
    #rnd_feature_net_fc_arch_hidden_units: (128, 64)

ppo_NatureCNN_graclip05_b32_h512_lr14_obs84: &ppo_NatureCNN_graclip05_b32_h512_lr14_obs84
        nbr_frame_stacking: 1
        observation_resize_dim: 84
        discount: 0.99
        use_gae: True
        use_cuda: True
        gae_tau: 0.95
        value_weight: 1.0
        entropy_weight: 0.02
        gradient_clip: 0.5
        optimization_epochs: 10
        mini_batch_size: 32
        ppo_ratio_clip: 0.2
        learning_rate: 1.e-4
        adam_eps: 1.0e-8
        #nbr_actor: 4
        horizon: 512
        phi_arch: 'CNN' #'MLP' #'CNN-GRU-RNN'
        actor_arch: 'MLP'
        critic_arch: 'MLP'
        
        # Phi Body: Nature:
        phi_arch_channels: [32, 64, 64] # [32, 64, 32]
        phi_arch_kernels: [8, 4, 3]
        phi_arch_strides: [4, 2, 1]
        phi_arch_paddings: [1, 1, 1]
        phi_arch_feature_dim: 512 #vae_nbr_latent_dim']#512
        phi_arch_hidden_units: [512,]

        <<: *extra_hyperparameters

ppo_baselineCNN_graclip05_b32_h512_lr14_obs84: &ppo_baselineCNN_graclip05_b32_h512_lr14_obs84
        nbr_frame_stacking: 1
        observation_resize_dim: 84
        discount: 0.99
        use_gae: True
        use_cuda: True
        gae_tau: 0.95
        value_weight: 1.0
        entropy_weight: 0.02
        gradient_clip: 0.5
        optimization_epochs: 10
        mini_batch_size: 32
        ppo_ratio_clip: 0.2
        learning_rate: 1.e-4
        adam_eps: 1.0e-8
        #nbr_actor: 4
        horizon: 512
        phi_arch: 'CNN' #'MLP' #'CNN-GRU-RNN'
        actor_arch: 'MLP'
        critic_arch: 'MLP'
        
        # Phi Body: Nature:
        phi_arch_channels: [32, 64, 64] # [32, 64, 32]
        phi_arch_kernels: [8, 4, 3]
        phi_arch_strides: [4, 2, 1]
        phi_arch_paddings: [1, 1, 1]
        phi_arch_feature_dim: 512 #vae_nbr_latent_dim']#512
        phi_arch_hidden_units: [512,]

        # Actor architecture:
        actor_arch_hidden_units: [512,]
        # Critic architecture:
        critic_arch_hidden_units: [512,]

        <<: *extra_hyperparameters


ppo_LargeCNN_graclip05_b32_h512_lr14_obs84: &ppo_LargeCNN_graclip05_b32_h512_lr14_obs84
        nbr_frame_stacking: 1
        observation_resize_dim: 84
        discount: 0.99
        use_gae: True
        use_cuda: True
        gae_tau: 0.95
        value_weight: 1.0
        entropy_weight: 0.01
        gradient_clip: 0.5
        optimization_epochs: 3
        mini_batch_size: 256
        ppo_ratio_clip: 0.1
        learning_rate: 2.5e-4
        adam_eps: 1.0e-8
        #nbr_actor: 4
        horizon: 128
        phi_arch: 'CNN' #'MLP' #'CNN-GRU-RNN'
        actor_arch: 'None'
        critic_arch: 'None'
        
        # Phi Body: Nature:
        phi_arch_channels: [32, 64, 64] # [32, 64, 32]
        phi_arch_kernels: [8, 4, 3]
        phi_arch_strides: [4, 2, 1]
        phi_arch_paddings: [1, 1, 1]
        phi_arch_feature_dim: 512 #vae_nbr_latent_dim']#512
        phi_arch_hidden_units: [512,]

        # Actor architecture:
        actor_arch_hidden_units: []
        # Critic architecture:
        critic_arch_hidden_units: []

        <<: *extra_hyperparameters


ppo_baselineCNNActorCritic_graclip05_b32_h512_lr14_obs84: &ppo_baselineCNNActorCritic_graclip05_b32_h512_lr14_obs84
        nbr_frame_stacking: 1
        observation_resize_dim: 84
        discount: 0.99
        use_gae: True
        use_cuda: True
        gae_tau: 0.95
        value_weight: 1.0
        entropy_weight: 0.02
        gradient_clip: 0.5
        optimization_epochs: 10
        mini_batch_size: 32
        ppo_ratio_clip: 0.2
        learning_rate: 1.e-4
        adam_eps: 1.0e-8
        #nbr_actor: 4
        horizon: 512
        phi_arch: None #'MLP' #'CNN-GRU-RNN'
        actor_arch: 'CNN'
        critic_arch: 'CNN'
        
        # Phi Body: Nature:
        phi_arch_channels: [32, 64, 64] # [32, 64, 32]
        phi_arch_kernels: [8, 4, 3]
        phi_arch_strides: [4, 2, 1]
        phi_arch_paddings: [1, 1, 1]
        phi_arch_feature_dim: 512 #vae_nbr_latent_dim']#512
        phi_arch_hidden_units: [512,]

        # Actor architecture:
        actor_arch_hidden_units: [512,]
        # CNN:
        actor_arch_channels: [32, 64, 64] # [32, 64, 32]
        actor_arch_kernels: [8, 4, 3]
        actor_arch_strides: [4, 2, 1]
        actor_arch_paddings: [1, 1, 1]
        actor_arch_feature_dim: 512 #vae_nbr_latent_dim']#512
        
        # Critic architecture:
        critic_arch_hidden_units: [512,]
        # CNN:
        critic_arch_channels: [32, 64, 64] # [32, 64, 32]
        critic_arch_kernels: [8, 4, 3]
        critic_arch_strides: [4, 2, 1]
        critic_arch_paddings: [1, 1, 1]
        critic_arch_feature_dim: 512 #vae_nbr_latent_dim']#512
        
        <<: *extra_hyperparameters


ppo_DQNCNN_graclip05_ep3_b32_h128_lr254_obs84: &ppo_DQNCNN_graclip05_ep3_b32_h128_lr254_obs84
        nbr_frame_stacking: 1
        observation_resize_dim: 84
        discount: 0.99
        use_gae: True
        use_cuda: True
        gae_tau: 0.95
        value_weight: 1.0
        entropy_weight: 0.01
        gradient_clip: 0.5
        optimization_epochs: 3
        mini_batch_size: 32
        ppo_ratio_clip: 0.2
        learning_rate: 2.5e-4
        adam_eps: 1.0e-8
        #nbr_actor: 4
        horizon: 128
        phi_arch: 'CNN' #'MLP' #'CNN-GRU-RNN'
        actor_arch: 'MLP'
        critic_arch: 'MLP'
        
        # Phi Body: Nature:
        phi_arch_channels: [16, 32]
        phi_arch_kernels: [8, 4]
        phi_arch_strides: [4, 2]
        phi_arch_paddings: [1, 1]
        phi_arch_feature_dim: 256 #vae_nbr_latent_dim']#512
        phi_arch_hidden_units: [256,]

        # Actor architecture:
        actor_arch_hidden_units: [512,]
        # CNN:
        #actor_arch_channels: [32, 64, 64] # [32, 64, 32]
        #actor_arch_kernels: [8, 4, 3]
        #actor_arch_strides: [4, 2, 1]
        #actor_arch_paddings: [1, 1, 1]
        #actor_arch_feature_dim: 512 #vae_nbr_latent_dim']#512
        
        # Critic architecture:
        critic_arch_hidden_units: [512,]
        # CNN:
        #critic_arch_channels: [32, 64, 64] # [32, 64, 32]
        #critic_arch_kernels: [8, 4, 3]
        #critic_arch_strides: [4, 2, 1]
        #critic_arch_paddings: [1, 1, 1]
        #critic_arch_feature_dim: 512 #vae_nbr_latent_dim']#512

        <<: *extra_hyperparameters


a2c_DQNCNN_NOGAE_NOStdAdv_graclip05_ep1_b32_h20_lr7e4_obs84: &a2c_DQNCNN_NOGAE_NOStdAdv_graclip05_ep1_b32_h20_lr7e4_obs84
        nbr_frame_stacking: 1
        observation_resize_dim: 84
        discount: 0.99
        use_gae: False
        use_cuda: True
        gae_tau: 0.95
        entropy_weight: 0.1 # LEGACY PARAMETER
        gradient_clip: 0.5
        optimization_epochs: 1 # LEGACY PARAMETER
        mini_batch_size: 32
        learning_rate: 7.0e-4
        optimizer_eps: 1.0e-5
        optimizer_alpha: 0.99
        #nbr_actor: 16
        horizon: 20 # DEFAULT: 5 
        phi_arch: 'CNN' #'MLP' #'CNN-GRU-RNN'
        actor_arch: 'MLP'
        critic_arch: 'MLP'
        
        # Phi Body: Nature:
        phi_arch_channels: [16, 32]
        phi_arch_kernels: [8, 4]
        phi_arch_strides: [4, 2]
        phi_arch_paddings: [1, 1]
        phi_arch_feature_dim: 256 #vae_nbr_latent_dim']#512
        phi_arch_hidden_units: [256,]

        # Actor architecture:
        actor_arch_hidden_units: [512,]
        # CNN:
        #actor_arch_channels: [32, 64, 64] # [32, 64, 32]
        #actor_arch_kernels: [8, 4, 3]
        #actor_arch_strides: [4, 2, 1]
        #actor_arch_paddings: [1, 1, 1]
        #actor_arch_feature_dim: 512 #vae_nbr_latent_dim']#512
        
        # Critic architecture:
        critic_arch_hidden_units: [512,]
        # CNN:
        #critic_arch_channels: [32, 64, 64] # [32, 64, 32]
        #critic_arch_kernels: [8, 4, 3]
        #critic_arch_strides: [4, 2, 1]
        #critic_arch_paddings: [1, 1, 1]
        #critic_arch_feature_dim: 512 #vae_nbr_latent_dim']#512

        <<: *extra_hyperparameters

a2c_NIPS_graclip3e0_ep1_b32_h5_lr7e4XNbrActor_obs84: &a2c_NIPS_graclip3e0_ep1_b32_h5_lr7e4XNbrActor_obs84
        nbr_frame_stacking: 1
        observation_resize_dim: 84
        discount: 0.99
        use_gae: False
        use_cuda: True
        gae_tau: 0.95
        entropy_weight: 0.02
        gradient_clip: 3.0
        optimization_epochs: 1 # LEGACY PARAMETER
        mini_batch_size: 32
        learning_rate: 7.0e-4
        optimizer_eps: 1.0e-1
        optimizer_alpha: 0.99
        #nbr_actor: 16
        horizon: 5 # DEFAULT: 5 
        phi_arch: 'CNN' #'MLP' #'CNN-GRU-RNN'
        actor_arch: 'MLP'
        critic_arch: 'MLP'
        
        # Phi Body: Nature:
        phi_arch_channels: [16, 32]
        phi_arch_kernels: [8, 4]
        phi_arch_strides: [4, 2]
        phi_arch_paddings: [1, 1]
        phi_arch_feature_dim: 256 #vae_nbr_latent_dim']#512
        phi_arch_hidden_units: [256,]

        # Actor architecture:
        actor_arch_hidden_units: [256,]
        # CNN:
        #actor_arch_channels: [32, 64, 64] # [32, 64, 32]
        #actor_arch_kernels: [8, 4, 3]
        #actor_arch_strides: [4, 2, 1]
        #actor_arch_paddings: [1, 1, 1]
        #actor_arch_feature_dim: 512 #vae_nbr_latent_dim']#512
        
        # Critic architecture:
        critic_arch_hidden_units: [256,]
        # CNN:
        #critic_arch_channels: [32, 64, 64] # [32, 64, 32]
        #critic_arch_kernels: [8, 4, 3]
        #critic_arch_strides: [4, 2, 1]
        #critic_arch_paddings: [1, 1, 1]
        #critic_arch_feature_dim: 512 #vae_nbr_latent_dim']#512

        <<: *extra_hyperparameters


experiment:
    tasks: [{'env-id': 'QbertNoFrameskip-v4',
            #{'env-id': 'BreakoutDeterministic-v4',
            #{'env-id': 'PongDeterministic-v4',
            #{'env-id': 'PongDeterministic-v4',
            #{'env-id': 'BreakoutNoFrameskip-v4',
            #{'env-id': 'SeaquestNoFrameskip-v4',
            #{'env-id': 'BoxingNoFrameskip-v4',
             
            'run-id': 'Multinomial+SingLifeClipReward_EPS30_Seed131_penv_ppo_8actors_RLLOOPRESET+BaselineWrap+Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30',
            #'run-id': 'SingLife_EPS30_Seed131_penv_ppo_8actors_RLLOOPRESET+BaselineWrap+Max+Sk4_St4_Obs84_Grayscale_RandNoOpStart30',
            
            #'agent-id': 'ppo_baselineCNN_GAE0.95_ep3_clip0.1_gradclip0.5_h128_b32_gamma0.99_lr2.5e-4XNbrActor_ent0.0',
            
            #'agent-id': 'ppo_baselineCNN_GAE0.95_ep3_clip0.1_gradclip0.5_h128_b256_gamma0.99_lr2.5e-4_ent0.01',
            #'agent-id': 'ppo_baselineCNN_GAE0.95_ep3_clip0.1_gradclip0.5_h128_b32_gamma0.99_lr2.5e-4_ent0.01',
            'agent-id': 'ppo_LargeCNN_GAE0.95_ep3_clip0.1_gradclip0.5_h128_b256_gamma0.99_lr2.5e-4_ent0.01',

            #'run-id': 'regidx_bootstrapped_a2c_DQNCNN_8actors_Sk4_St4_Obs84',
            #'run-id': 'vecenv_a2c_16actors_Sk1_St4_Obs84_Grayscale',
            
            #'run-id': 'penv_w4_a2c_32actors_Sk4_St4_Obs84_Grayscale_RandStart',
            #'run-id': 'vecenv_a2c_16actors_Sk4_St4_Obs84_Grayscale',
            #'run-id': 'NoNormAdvReLUFFC_penv_a2c_16actors_Sk1_St4_Obs84_Grayscale_RandNoOpStart30',
            
            #'agent-id': 'a2c_DQNCNN_NOGAE_NOStdAdv_graclip05_ep1_b32_h20_lr7e4_obs84_ent0.1',
            #'agent-id': 'a2c_NIPS_NOGAE_NOStdAdv_graclip3.0_ep1_bNone_h20_lr7e4XNbrActor_obs84_ent0.1',
            #'agent-id': 'a2c_NIPS_NOGAE_NOStdAdv_graclip40.0_eps0.1_alpha0.99_ep1_bNone_h5_lr7e5XNbrActor_obs84_ent0.01',

             'nbr_actor': 1,
             'nbr_frame_skipping': 4,
             'act_rand_repeat': False,
             'nbr_frame_stacking': 4,
             'grayscale': True,
             'single_life_episode': True,
             'nbr_max_random_steps': 30,
             'clip_reward': True,
             'observation_resize_dim': 84
             },
            #{'env-id': 'MiniWorld-OneRoomS6Fast-v0',
            # 'agent-id': 'ppo_scnn_lr3_obs84_initXavier',
            # 'nbr_actors': 32,
            # 'nbr_frame_skipping': 4,
            # 'nbr_frame_stacking': 4,
            # 'observation_resize_dim': 84
            # },
            #{'env-id': 'SeaquestNoFrameskip-v4',
            # 'agent-id': 'ppo',
            # 'nbr_actors': 32,
            # 'nbr_frame_skipping': 4,
            # 'nbr_frame_stacking': 4,
            # 'observation_resize_dim': 64
            # },
            # {'env-id': 'Pong-v0',
            # 'agent-id': 'ppo',
            # 'nbr_actors': 32,
            # 'nbr_frame_skipping': 4,
            # 'nbr_frame_stacking': 4,
            # 'observation_resize_dim': 64
            # }
            ]
    experiment_id: 'benchmark-test'
    benchmarking_episodes: 10
    train_observation_budget: 1.0e7
    seed: 131

agents:    
    ppo_distlogit_outloopOldPred_paper_GAE_ep10_clip0.2_gradclip0.5_b256_lr2.5e-4_ent0.01_16actors_NoNormR_Sk1:
        <<: *ppo_DQNCNN_graclip05_ep3_b32_h128_lr254_obs84
        nbr_actors: 16
        ppo_ratio_clip: 0.2
        entropy_weight: 0.01
        use_gae: True
        horizon: 128
        learning_rate: 2.5e-4
        mini_batch_size: 256
        optimization_epochs: 10
        gradient_clip: 0.5
    
    ppo_NatureCNN_GAE_ep3_clip0.1_gradclip0.5_b256_lr2.5e-4_ent0.01:
        <<: *ppo_NatureCNN_graclip05_b32_h512_lr14_obs84
        ppo_ratio_clip: 0.1
        entropy_weight: 0.01
        use_gae: True
        horizon: 128
        learning_rate: 2.5e-4
        mini_batch_size: 256
        optimization_epochs: 3
        gradient_clip: 0.5

    ppo_NatureCNN_NOGAE_ep3_clip0.1_gradclip0.5_b256_lr2.5e-4_ent0.01:
        <<: *ppo_NatureCNN_graclip05_b32_h512_lr14_obs84
        ppo_ratio_clip: 0.1
        entropy_weight: 0.01
        use_gae: False
        horizon: 128
        learning_rate: 2.5e-4
        mini_batch_size: 256
        optimization_epochs: 3
        gradient_clip: 0.5

    ppo_NatureCNN_GAE_ep3_clip0.1_gradclip0.5_b256_lr2.5e-4_ent0.01:
        <<: *ppo_NatureCNN_graclip05_b32_h512_lr14_obs84
        ppo_ratio_clip: 0.1
        entropy_weight: 0.01
        use_gae: True
        horizon: 128
        learning_rate: 2.5e-4
        mini_batch_size: 256
        optimization_epochs: 3
        gradient_clip: 0.5
    
    #ppo_baselineCNN_GAE0.95_ep3_clip0.1_gradclip0.5_h128_b32_gamma0.99_lr2.5e-4XNbrActor_ent0.0:
    #ppo_baselineCNN_GAE0.95_ep3_clip0.1_gradclip0.5_h128_b256_gamma0.99_lr2.5e-4_ent0.0:
    ppo_baselineCNN_GAE0.95_ep3_clip0.1_gradclip0.5_h128_b256_gamma0.99_lr2.5e-4_ent0.01:
        <<: *ppo_baselineCNN_graclip05_b32_h512_lr14_obs84
        standardized_adv: True
        lr_account_for_nbr_actor: False
        ppo_ratio_clip: 0.1
        entropy_weight: 0.01
        use_gae: True
        gae_tau: 0.95
        horizon: 128
        learning_rate: 2.5e-4
        mini_batch_size: 256
        optimization_epochs: 3
        gradient_clip: 0.5

    ppo_LargeCNN_GAE0.95_ep3_clip0.1_gradclip0.5_h128_b256_gamma0.99_lr2.5e-4_ent0.01:
        <<: *ppo_LargeCNN_graclip05_b32_h512_lr14_obs84
        standardized_adv: True
        lr_account_for_nbr_actor: False
        ppo_ratio_clip: 0.1
        entropy_weight: 0.01
        use_gae: True
        gae_tau: 0.95
        horizon: 128
        learning_rate: 2.5e-4
        mini_batch_size: 256
        optimization_epochs: 3
        gradient_clip: 0.5

    ppo_baselineCNNActorCritic_GAE0.95_ep3_clip0.1_gradclip0.5_h128_b32_gamma0.99_lr2.5e-4_ent0.01:
        <<: *ppo_baselineCNNActorCritic_graclip05_b32_h512_lr14_obs84
        ppo_ratio_clip: 0.1
        entropy_weight: 0.01
        use_gae: True
        gae_tau: 0.95
        horizon: 128
        learning_rate: 2.5e-4
        mini_batch_size: 32
        optimization_epochs: 3
        gradient_clip: 0.5

    a2c_DQNCNN_NOGAE_NOStdAdv_graclip05_ep1_b32_h20_lr7e4_obs84_ent0.1:
        <<: *a2c_DQNCNN_NOGAE_NOStdAdv_graclip05_ep1_b32_h20_lr7e4_obs84
        entropy_weight: 0.1
        learning_rate: 7.0e-4

    a2c_NIPS_NOGAE_NOStdAdv_graclip40.0_eps0.1_alpha0.99_ep1_bNone_h5_lr7e5XNbrActor_obs84_ent0.01:
        <<: *a2c_NIPS_graclip3e0_ep1_b32_h5_lr7e4XNbrActor_obs84
        use_gae: False
        gradient_clip: 40.0
        entropy_weight: 0.01
        standardized_adv: False
        mini_batch_size: None
        horizon: 5
        learning_rate: 7.0e-5
        optimizer_eps: 0.1
        optimizer_alpha: 0.99