MLP: &MLP 
        #-------------------------------------------------------------------
        # Actor Model:
        #-------------------------------------------------------------------
        actor_phi_arch: 'MLP'
        actor_goal_phi_arch: 'None'
        actor_head_arch: 'MLP'
        
        
        # Phi Body:
        # CNN:
        #actor_phi_arch_channels: [16, M, 32, 64]
        #actor_phi_arch_kernels: [2, 2, 2, 2]
        #actor_phi_arch_strides: [1, 1, 1, 1]
        #actor_phi_arch_paddings: [1, 1, 1, 1]
        
        #MLP:
        actor_phi_arch_hidden_units: [400,300,]

        # Actor head:
        actor_head_arch_hidden_units: [300,]
        
        # Goal Phi Body:
        # CNN:
        #actor_goal_phi_arch_channels: None
        #actor_goal_phi_arch_kernels: None
        #actor_goal_phi_arch_strides: None
        #actor_goal_phi_arch_paddings: None
        # MLP:
        actor_goal_phi_arch_hidden_units: [128,]

        #actor_goal_phi_arch_embedding_size: 32

        #-------------------------------------------------------------------
        # Critic Model :
        #-------------------------------------------------------------------
        
        critic_phi_arch: 'MLP'
        critic_action_phi_arch: 'MLP'
        critic_goal_phi_arch: 'None'
        critic_head_arch: 'MLP'
        
        
        # Phi Body:
        # CNN:
        #critic_phi_arch_channels: [16, M, 32, 64]
        #critic_phi_arch_kernels: [2, 2, 2, 2]
        #critic_phi_arch_strides: [1, 1, 1, 1]
        #critic_phi_arch_paddings: [1, 1, 1, 1]
        
        #MLP:
        critic_phi_arch_hidden_units: [400,300]

        # Action Phi Body:
        #MLP:
        critic_action_phi_arch_hidden_units: [300,]

        # Critic head:
        critic_head_arch_hidden_units: [256,]
        
        # Goal Phi Body:
        # CNN:
        #critic_goal_phi_arch_channels: None
        #critic_goal_phi_arch_kernels: None
        #critic_goal_phi_arch_strides: None
        #critic_goal_phi_arch_paddings: None
        # MLP:
        critic_goal_phi_arch_hidden_units: [128,]

        #critic_goal_phi_arch_embedding_size: 32

extra_hyperparameters: &extra_hyperparameters
    lr_account_for_nbr_actor: False 
    weights_decay_lambda: 0.01 # 1e-6
    use_target_to_gather_data:    False
    goal_oriented: False 
    goal_state_shared_arch:  False
    goal_state_flattening: False    #True
    nbr_training_iteration_per_cycle: 1 # HER: 40 # DDPG 1
    #nbr_episode_per_cycle:  16  # HER: 16 DQN needs removal.
    HER_use_latent: False   #True
    HER_target_clamping: False 

DDPG_MLP: &DDPG_MLP
        noisy: False 
        n_step: 1

        use_PER: False
        PER_alpha: 0.6
        PER_beta: 1.0

        replay_capacity: 5e6
        min_capacity: 5.0e2 #1e4
        replay_period: 1 #240

        use_HER:    False
        HER_strategy:   'final-1' #'future-4' #

        observation_resize_dim: None
        goal_resize_dim: None
        
        discount: 0.99
        use_cuda: True
        gradient_clip: 1.0
        batch_size: 64 #32
        tau: 1e-3 #0.995
        critic_learning_rate: 1.0e-3 #1.0e-5   # 1e-4 predictor while 1e-5 network...
        actor_learning_rate: 1.0e-4 #1.0e-5   # 1e-4 predictor while 1e-5 network...
        adam_eps: 1.0e-8
        
        <<: *MLP
        <<: *extra_hyperparameters
        
experiment:
    tasks: [
            {'env-id': 'Hopper-v2',

             #'run-id': 'B1M/final-1/EpPerCycle16/lrPr1m4Net1m4/Seed100_venv_Max+Sk0_St4_ObsNone_ClipReward_Eps5p5End5m2_tau100_GradClip1_THER1p1/EnvReward0p1_PredicateEps1e0_NoTargetClamping/SentTrueL7_40MaxTrainPerUpdate_AccGoal70_THERPredTrainPeriodOnBufferedPeriodAndOnSuccess_MaxEntr1m1',
             'run-id': 'B1M/SmallMLP/ActualExploration/ReplayPeriod1_UpdatePerReplay1_MinCap5p3_Cap5p6_b64/CriticLr1m3_ActorLr1m4_Seed100_venv_ObsNone_tau1m3_GradClip1p0/WeightDecay1m2',
             
             #'agent-id': '1step_prioritized_double_THER_LargeCNN_beta4m1_alpha_7m1_HER40TrainPerCycle_16EpPerCycle_NoPrioritizedPredTraining_PredPiSharedPhi',
             #'agent-id': '1step_DDPG_MLP256',
             #'agent-id': '1step_DDPG_ActorGRURNNCriticMLP256',
             #'agent-id': '1step_DDPG_ActorMLPCriticGRURNN256',
             'agent-id': '1step_DDPG_MLP256',
             
             'nbr_actor': 1,
             #'nbr_frame_skipping': 0,
             #'nbr_frame_stacking': 4,
             #'single_life_episode': False,
             #'nbr_max_random_steps': 0,
             #'clip_reward': False,
             'observation_resize_dim': None,
             'goal_resize_dim': None,
             },
            ]
    #experiment_id: 'DDPG_Benchmark'
    experiment_id: '/home/kevin/debug_TD3/'
    benchmarking_episodes: 1
    benchmarking_interval: 1.0e10
    benchmarking_record_episode_interval: 1.0e8
    video_recording_episode_period: 1e2
    train_observation_budget: 1.0e6
    seed: 100

agents:        
    # 1step_prioritized_double_THER_LargeCNN_beta4m1_alpha_7m1_HER40TrainPerCycle_16EpPerCycle_NoPrioritizedPredTraining:
    #     <<: *THER_LargeCNN_Predictor
    #     double: True
    #     #dueling: True 
    #     #noisy: True 
    #     n_step: 1
    #     use_PER: True
    #     PER_alpha: 0.7
    #     PER_beta: 0.4
    #     #replay_period: 2    
    #     #batch_size: 16
    #     # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
    #     # but bottleneck on GPU batchsize gives a better trade-off 
    #     # batch-regularization-effect / speed with a batch_size=16 
    #     # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
    #     # and ~84 it/sec with updates...
    #     # Whereas 32 / 4 yielded ~25 it/sec....
    #     THER_use_PER: False
    #     THER_PER_alpha: 0.7
    #     THER_PER_beta: 0.4
    #     nbr_training_iteration_per_cycle: 40 # HER: 40
    #     nbr_episode_per_cycle:  16  # HER: 16
    
    1step_DDPG_MLP256:
        <<: *DDPG_MLP
        #noisy: True 
        n_step: 1
        use_PER: False
        PER_alpha: 0.7
        PER_beta: 0.4
        #replay_period: 2    
        #batch_size: 16
        # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
        # but bottleneck on GPU batchsize gives a better trade-off 
        # batch-regularization-effect / speed with a batch_size=16 
        # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
        # and ~84 it/sec with updates...
        # Whereas 32 / 4 yielded ~25 it/sec....
        #nbr_training_iteration_per_cycle: 40 # HER: 40
        #nbr_episode_per_cycle:  16  # HER: 16

    1step_DDPG_ActorGRURNNCriticMLP256:
        <<: *DDPG_MLP
        actor_head_arch: 'GRU-RNN'
        #noisy: True 
        n_step: 1
        use_PER: False
        PER_alpha: 0.7
        PER_beta: 0.4
        #replay_period: 2    
        #batch_size: 16
        # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
        # but bottleneck on GPU batchsize gives a better trade-off 
        # batch-regularization-effect / speed with a batch_size=16 
        # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
        # and ~84 it/sec with updates...
        # Whereas 32 / 4 yielded ~25 it/sec....
        #nbr_training_iteration_per_cycle: 40 # HER: 40
        #nbr_episode_per_cycle:  16  # HER: 16

    1step_DDPG_ActorMLPCriticGRURNN256:
        <<: *DDPG_MLP
        critic_head_arch: 'GRU-RNN'
        #noisy: True 
        n_step: 1
        use_PER: False
        PER_alpha: 0.7
        PER_beta: 0.4
        #replay_period: 2    
        #batch_size: 16
        # Paper: ratio = batch_size(=32) / replay_period(=4) = 8 ,
        # but bottleneck on GPU batchsize gives a better trade-off 
        # batch-regularization-effect / speed with a batch_size=16 
        # using NVIDIA 1080 Ti... Expect ~90 it/sec, without update
        # and ~84 it/sec with updates...
        # Whereas 32 / 4 yielded ~25 it/sec....
        #nbr_training_iteration_per_cycle: 40 # HER: 40
        #nbr_episode_per_cycle:  16  # HER: 16
   
   