MLP: &MLP 
        #-------------------------------------------------------------------
        # Actor Model:
        #-------------------------------------------------------------------
        actor_phi_arch: 'MLP'
        actor_goal_phi_arch: 'None'
        actor_head_arch: 'MLP'
        
        
        # Phi Body:
        # CNN:
        #actor_phi_arch_channels: [16, M, 32, 64]
        #actor_phi_arch_kernels: [2, 2, 2, 2]
        #actor_phi_arch_strides: [1, 1, 1, 1]
        #actor_phi_arch_paddings: [1, 1, 1, 1]
        
        #MLP:
        actor_phi_arch_hidden_units: [400,300,]

        # Actor head:
        actor_head_arch_hidden_units: [300,]
        
        # Goal Phi Body:
        # CNN:
        #actor_goal_phi_arch_channels: None
        #actor_goal_phi_arch_kernels: None
        #actor_goal_phi_arch_strides: None
        #actor_goal_phi_arch_paddings: None
        # MLP:
        actor_goal_phi_arch_hidden_units: [128,]

        #actor_goal_phi_arch_embedding_size: 32

        #-------------------------------------------------------------------
        # Critic Model :
        #-------------------------------------------------------------------
        
        critic_phi_arch: 'MLP'
        critic_action_phi_arch: 'MLP'
        critic_goal_phi_arch: 'None'
        critic_head_arch: 'MLP'
        
        
        # Phi Body:
        # CNN:
        #critic_phi_arch_channels: [16, M, 32, 64]
        #critic_phi_arch_kernels: [2, 2, 2, 2]
        #critic_phi_arch_strides: [1, 1, 1, 1]
        #critic_phi_arch_paddings: [1, 1, 1, 1]
        
        #MLP:
        critic_phi_arch_hidden_units: [400,300]

        # Action Phi Body:
        #MLP:
        critic_action_phi_arch_hidden_units: [300,]

        # Critic head:
        critic_head_arch_hidden_units: [256,]
        
        # Goal Phi Body:
        # CNN:
        #critic_goal_phi_arch_channels: None
        #critic_goal_phi_arch_kernels: None
        #critic_goal_phi_arch_strides: None
        #critic_goal_phi_arch_paddings: None
        # MLP:
        critic_goal_phi_arch_hidden_units: [128,]

        #critic_goal_phi_arch_embedding_size: 32


BaselineMLP: &BaselineMLP 
        #-------------------------------------------------------------------
        # Actor Model:
        #-------------------------------------------------------------------
        actor_phi_arch: 'None'
        actor_goal_phi_arch: 'None'
        actor_head_arch: 'MLP'
        
        #MLP:
        actor_phi_arch_hidden_units: []

        # Actor head:
        actor_head_arch_hidden_units: [256, 256,]
        
        #-------------------------------------------------------------------
        # Critic Model :
        #-------------------------------------------------------------------
        
        critic_phi_arch: 'None'
        critic_action_phi_arch: 'None'
        critic_goal_phi_arch: 'None'
        critic_head_arch: 'MLP'
        
        #MLP:
        critic_phi_arch_hidden_units: []

        # Action Phi Body:
        #MLP:
        critic_action_phi_arch_hidden_units: []

        # Critic head:
        critic_head_arch_hidden_units: [256, 256,]
        

extra_hyperparameters: &extra_hyperparameters
    lr_account_for_nbr_actor: False 
    weights_decay_lambda_actor: 0.0 # 1e-6
    weights_decay_lambda_critic: 0.0 # 1e-6
    use_target_to_gather_data:    False
    goal_oriented: False 
    goal_state_shared_arch:  False
    goal_state_flattening: False    #True
    nbr_training_iteration_per_cycle: 1 # HER: 40 # DDPG 1
    #nbr_episode_per_cycle:  16  # HER: 16 DQN needs removal.
    HER_use_latent: False   #True
    HER_target_clamping: False 

TD3_MLP: &TD3_MLP
        noisy: False 
        n_step: 1

        use_PER: False
        PER_alpha: 0.6
        PER_beta: 1.0

        replay_capacity: 1e6
        replay_period: 1 #240

        use_HER:    False
        HER_strategy:   'final-1' #'future-4' #

        actor_noise_std: 0.1
        target_actor_noise_std: 0.2
        target_actor_noise_clip: 0.5
        
        min_capacity: 25e3
        actor_start_delay: 25e3

        actor_update_delay: 2
        ensemble_qnet_nbr_models: 2

        observation_resize_dim: None
        goal_resize_dim: None
        
        discount: 0.99
        use_cuda: True
        gradient_clip: 0.0 #1.0
        batch_size: 256 #64 #32
        tau: 5e-3 #0.995
        critic_learning_rate: 3.0e-4 #1.0e-5   # 1e-4 predictor while 1e-5 network...
        actor_learning_rate: 3.0e-4 #1.0e-5   # 1e-4 predictor while 1e-5 network...
        adam_eps: 1.0e-8
        
        <<: *MLP
        <<: *extra_hyperparameters


TD3_BaselineMLP: &TD3_BaselineMLP
        noisy: False 
        n_step: 1

        use_PER: False
        PER_alpha: 0.6
        PER_beta: 1.0

        replay_capacity: 1e6
        replay_period: 1 #240

        use_HER:    False
        HER_strategy:   'final-1' #'future-4' #

        actor_noise_std: 0.1
        target_actor_noise_std: 0.2
        target_actor_noise_clip: 0.5
        
        min_capacity: 25e3
        actor_start_delay: 25e3

        actor_update_delay: 2
        ensemble_qnet_nbr_models: 2

        observation_resize_dim: None
        goal_resize_dim: None
        
        discount: 0.99
        use_cuda: True
        gradient_clip: 0.0 #1.0
        batch_size: 256 #64 #32
        tau: 5e-3 #0.995
        critic_learning_rate: 3.0e-4 #1.0e-5   # 1e-4 predictor while 1e-5 network...
        actor_learning_rate: 3.0e-4 #1.0e-5   # 1e-4 predictor while 1e-5 network...
        adam_eps: 1.0e-8
        
        <<: *BaselineMLP
        <<: *extra_hyperparameters
        
experiment:
    tasks: [
            {'env-id': 'Ant-v2',

             'run-id': 'B1M/NoLayerInit/NotEvenInEnsembleQNet/ExplorationNoiseWithActionScaler/TargetDelayedUpdateLikeactor/ActorStart25p3/MinCapacity25e3/ActorNoiseStd1m1Target2m1Clip5m1/SmallMLP/ReplayPeriod1_UpdatePerReplay1_Cap1p6_b256/CriticLr3m4_ActorLr3m4_Seed100_tau5m3_GradClip0/WeightDecayCritic0Actor0/MSELOSS/',
             
             #'agent-id': '1step_TD3_MLP256',
             'agent-id': '1step_TD3_BaselineMLP',
             
             'nbr_actor': 1,
             #'nbr_frame_skipping': 0,
             #'nbr_frame_stacking': 4,
             #'single_life_episode': False,
             #'nbr_max_random_steps': 0,
             #'clip_reward': False,
             'observation_resize_dim': None,
             'goal_resize_dim': None,
             },
            ]
    experiment_id: '/home/kevin/debug_TD3/'
    benchmarking_episodes: 10
    benchmarking_interval: 5.0e3
    
    # Deprecated:
    benchmarking_record_episode_interval: 1.0e8
    
    video_recording_episode_period_training: 1e2
    video_recording_episode_period_benchmarking: 5e0
    train_observation_budget: 1.0e6
    seed: 100

agents:        
    # 1step_prioritized_double_THER_LargeCNN_beta4m1_alpha_7m1_HER40TrainPerCycle_16EpPerCycle_NoPrioritizedPredTraining:
    #     <<: *THER_LargeCNN_Predictor
    #     double: True
    #     #dueling: True 
    #     #noisy: True 
    #     n_step: 1
    #     use_PER: True
    #     PER_alpha: 0.7
    #     PER_beta: 0.4
    #     #replay_period: 2    
    #     #batch_size: 16
    #     # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
    #     # but bottleneck on GPU batchsize gives a better trade-off 
    #     # batch-regularization-effect / speed with a batch_size=16 
    #     # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
    #     # and ~84 it/sec with updates...
    #     # Whereas 32 / 4 yielded ~25 it/sec....
    #     THER_use_PER: False
    #     THER_PER_alpha: 0.7
    #     THER_PER_beta: 0.4
    #     nbr_training_iteration_per_cycle: 40 # HER: 40
    #     nbr_episode_per_cycle:  16  # HER: 16
    
    1step_TD3_MLP256:
        <<: *TD3_MLP
        #noisy: True 
        n_step: 1
        use_PER: False
        PER_alpha: 0.7
        PER_beta: 0.4
        #replay_period: 2    
        #batch_size: 16
        # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
        # but bottleneck on GPU batchsize gives a better trade-off 
        # batch-regularization-effect / speed with a batch_size=16 
        # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
        # and ~84 it/sec with updates...
        # Whereas 32 / 4 yielded ~25 it/sec....
        #nbr_training_iteration_per_cycle: 40 # HER: 40
        #nbr_episode_per_cycle:  16  # HER: 16

    1step_TD3_BaselineMLP:
        <<: *TD3_BaselineMLP
        #noisy: True 
        n_step: 1
        use_PER: False
        PER_alpha: 0.7
        PER_beta: 0.4
        #replay_period: 2    
        #batch_size: 16
        # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
        # but bottleneck on GPU batchsize gives a better trade-off 
        # batch-regularization-effect / speed with a batch_size=16 
        # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
        # and ~84 it/sec with updates...
        # Whereas 32 / 4 yielded ~25 it/sec....
        #nbr_training_iteration_per_cycle: 40 # HER: 40
        #nbr_episode_per_cycle:  16  # HER: 16
