extra_hyperparameters: &extra_hyperparameters
    standardized_adv: True
    lr_account_for_nbr_actor: False 

    # BetaVAE:
    use_vae: False
    vae_weight: 1e0
    vae_nbr_latent_dim: 128
    vae_decoder_nbr_layer: 3#4
    vae_decoder_conv_dim: 128
    
    cnn_encoder_feature_dim: 128 #vae_nbr_latent_dim
    
    vae_beta: 1e2
    vae_max_capacity: 1e2
    vae_nbr_epoch_till_max_capacity: 20
    vae_constrainedEncoding: False
    vae_tc_discriminator_hidden_units: [256,256,256,256,2] #tuple([2*cnn_encoder_feature_dim']]*4+[2])
    
    # Random Network Distillation:
    use_random_network_distillation: False
    intrinsic_discount: 0.99
    rnd_loss_int_ratio: 0.5
    rnd_obs_clip: 5
    rnd_non_episodic_int_r: True
    rnd_update_period_running_meanstd_int_reward: 1.e5
    rnd_update_period_running_meanstd_obs: 1.e5 #rnd_update_period_running_meanstd_int_reward
    # RND Convolutional Architecture:
    rnd_arch: 'CNN'
    rnd_arch_channels: [32, 64, 64]
    rnd_arch_kernels: [8, 4, 3]
    rnd_arch_strides: [4, 2, 1]
    rnd_arch_paddings: [0, 1, 1]
    rnd_arch_feature_dim: 512
    # RND Fully-Connected Architecture:
    #rnd_feature_net_fc_arch_hidden_units: (128, 64)


LargeCNN: &LargeCNN
        phi_arch: 'CNN'
        actor_arch: 'None'
        critic_arch: 'None'
        
        # Phi Body:
        phi_arch_channels: [32, 64, 64]
        phi_arch_kernels: [8, 4, 3]
        phi_arch_strides: [4, 2, 1]
        phi_arch_paddings: [1, 1, 1]
        phi_arch_feature_dim: 512
        phi_arch_hidden_units: [512,]

        # Actor architecture:
        actor_arch_hidden_units: []
        # Critic architecture:
        critic_arch_hidden_units: []


LargeGRUCNN: &LargeGRUCNN
        phi_arch: 'CNN-GRU-RNN'
        actor_arch: 'None'
        critic_arch: 'None'
        
        # Phi Body:
        phi_arch_channels: [32, 64, 64]
        phi_arch_kernels: [8, 4, 3]
        phi_arch_strides: [4, 2, 1]
        phi_arch_paddings: [1, 1, 1]
        phi_arch_feature_dim: 512
        phi_arch_hidden_units: [512,]

        # Actor architecture:
        actor_arch_hidden_units: []
        # Critic architecture:
        critic_arch_hidden_units: []

        
ppo_LargeCNN_obs84_GAE95_V1_E1m2_graclip5m1_ep3_b32_ratio1m1_h1024_lr25m5: &ppo_LargeCNN_obs84_GAE95_V1_E1m2_graclip5m1_ep3_b32_ratio1m1_h1024_lr25m5
        observation_resize_dim: 84
        discount: 0.99
        use_gae: True
        use_cuda: True
        gae_tau: 0.95
        value_weight: 1.0
        entropy_weight: 0.01
        gradient_clip: 0.5
        optimization_epochs: 3 # From baseline
        mini_batch_size: 32 # From baseline
        ppo_ratio_clip: 0.1 # From baseline!!!! 
        horizon: 1024 # From baseline
        learning_rate: 2.5e-4 # From baseline
        adam_eps: 1.0e-8
        
        <<: *LargeCNN
        <<: *extra_hyperparameters


experiment:
    tasks: [{'env-id': 'MineRLTreechop-v0',
             
             #'run-id': 'Seed13_venv_ppo_1actors_Sk4_St4_Obs84_NoGrayscale_NoScaling_PPOratio1m1_StdAdv_RLPYT_Ep10_b512_h2048',
             #'run-id': 'Seed13_venv_ppo_1actors_Sk4_St4_Obs84_NoGrayscale_NoScaling_PPOratio1m1_StdAdv_SingleRewardEpisode+P',
             #'run-id': 'Seed13_venv_ppo_1actors_Sk4_St4_Obs64_NoGrayscale_NoScaling_PPOratio1m1_NoStdAdv',
             
             'run-id': 'Seed10_venv_ppo_1actors_Sk4_St4_Obs64_NoGrayscale_NoScaling_PPOratio1m1_StdAdv_Ep10_b64+StepDecayHooks',
             
             #'run-id': 'Seed13_venv_ppo_1actors_Sk4_St4_Obs84_NoGrayscale_NoScaling_PPOratio1m1_StdAdv_ProgressiveRewardEpisode1e4+P',
             #'agent-id': 'ppo_LargeCNN',
             #'agent-id': 'ppo_LargeCNN_V5m1',
             'agent-id': 'ppo_LargeCNN_V5m1_ratio2m1',
             #'agent-id': 'ppo_LargeCNN_h128_b32',
             
             'nbr_actor': 1,
             'nbr_frame_skipping': 4,
             'nbr_frame_stacking': 4,
             'grayscale': False,
             'scaling': False,
             'observation_wrapper': 'ObtainPOV',
             'action_wrapper':  'SerialDiscrete',
             'observation_resize_dim': 64, #84,
             'reward_scheme': 'None', #'penalizing_progressive1e4' #'penalizing_single_reward_episode'
             },
            ]
    experiment_id: 'MineRL_training'
    benchmarking_episodes: 0
    benchmarking_interval: 1.0e4
    benchmarking_record_episode_interval: 0
    train_observation_budget: 8.0e6
    seed: 10

agents:    
    ppo_LargeCNN:
        <<: *ppo_LargeCNN_obs84_GAE95_V1_E1m2_graclip5m1_ep3_b32_ratio1m1_h1024_lr25m5
        #standardized_adv: False #From Baseline!!
        #optimization_epochs: 10 #From RLPyt
        #mini_batch_size: 512   #From RLPyt with h4096:b1024  h2048:512 #256
        #horizon: 2048   #From RLPyt or Baseline=32 with h1024
    
    ppo_LargeCNN_V5m1:
        <<: *ppo_LargeCNN_obs84_GAE95_V1_E1m2_graclip5m1_ep3_b32_ratio1m1_h1024_lr25m5
        #standardized_adv: False #From Baseline!!
        optimization_epochs: 10 #From RLPyt
        mini_batch_size: 64 #256   #From RLPyt with h4096:b1024  h2048:512 #256
        #horizon: 2048   #From RLPyt or Baseline=32 with h1024
        value_weight: 0.5
    
    ppo_LargeCNN_V5m1_ratio2m1:
        <<: *ppo_LargeCNN_obs84_GAE95_V1_E1m2_graclip5m1_ep3_b32_ratio1m1_h1024_lr25m5
        #standardized_adv: False #From Baseline!!
        optimization_epochs: 10 #From RLPyt
        mini_batch_size: 64 #256   #From RLPyt with h4096:b1024  h2048:512 #256
        #horizon: 2048   #From RLPyt or Baseline=32 with h1024
        value_weight: 0.5
        ppo_ratio_clip: 0.2

    ppo_LargeCNN_h128_b32:
        <<: *ppo_LargeCNN_obs84_GAE95_V1_E1m2_graclip5m1_ep3_b32_ratio1m1_h1024_lr25m5
        #optimization_epochs: 10 #From RLPyt
        mini_batch_size: 32   
        horizon: 128   
