extra_hyperparameters: &extra_hyperparameters
    lr_account_for_nbr_actor: False 
    weights_decay_lambda: 1.0
    # R2D2: weights_decay_lambda: 0.0
    weights_entropy_lambda: 0.0 #01
    
    use_target_to_gather_data:    False
    goal_oriented: True 
    goal_state_shared_arch:  False
    goal_state_flattening: True
    nbr_training_iteration_per_cycle: 40
    nbr_episode_per_cycle:  16

    # HER:
    HER_target_clamping: True 

    ####################################
    # New hyperparameters:
    PER_compute_initial_priority: False
    
    sequence_replay_use_online_states: True
    sequence_replay_use_zero_initial_states: False
    sequence_replay_store_on_terminal: True
    
    r2d2_loss_masking: True
    r2d2_loss_masking_n_step_regularisation: True
    r2d2_bellman_target_SAD: False

    burn_in: True
    sequence_replay_unroll_length: 80
    sequence_replay_overlap_length: 40
    sequence_replay_burn_in_length: 20

    sequence_replay_PER_eta: 0.9

    #vdn: False 
    #vdn_nbr_players: 2

    #####################################
    
LargeCNN: &LargeCNN
        #sad: True 

        phi_arch: 'CNN'
        # R2D2:
        actor_arch: 'None'
        critic_arch: 'None'
        # R2D2: 
        #critic_arch: 'LSTM-RNN'
        
        goal_phi_arch: 'None'
        
        # Phi Body:
        # phi_arch_channels: [32, 64, 64]
        # phi_arch_kernels: [8, 4, 3]
        # phi_arch_strides: [4, 2, 1]
        # phi_arch_paddings: [1, 1, 1]
        # phi_arch_feature_dim: 512
        # phi_arch_hidden_units: [512,]
        #phi_arch_channels: ['BN32', 'BN64', 'BN64']
        phi_arch_channels: [32, 64, 64]
        #phi_arch_kernels: [8, 4, 3]
        phi_arch_kernels: [3, 3, 3]
        #phi_arch_strides: [4, 2, 1]
        phi_arch_strides: [2, 2, 1]
        phi_arch_paddings: [1, 1, 1]
        phi_arch_feature_dim: 512
        phi_arch_hidden_units: []

        # Actor architecture:
        actor_arch_hidden_units: []
        # Critic architecture:
        critic_arch_hidden_units: []
        # R2D2:
        # #critic_arch_feature_dim: 32
        # critic_arch_hidden_units: [512, 512]

        # Goal Phi Body:
        goal_phi_arch_channels: [32, 64, 64]
        goal_phi_arch_kernels: [8, 4, 3]
        goal_phi_arch_strides: [4, 2, 1]
        goal_phi_arch_paddings: [1, 1, 1]
        goal_phi_arch_feature_dim: 512
        goal_phi_arch_hidden_units: [512,]

        # Critic architecture:
        goal_critic_arch_hidden_units: []

        extra_inputs_infos: {
            'previous_reward':{
                shape: [1,], 
                target_location: ['critic_body', 'extra_inputs'],
            },
            'previous_action':{
                shape: ['task.action_dim',], 
                target_location: ['critic_body', 'extra_inputs'],
            },
            'desired_goal':{
                shape: ['task.observation_shape'],
                target_location: ['phi_body', 'extra_inputs'],
            },
            ########################
            # WITH SAD:
            ########################
            # 'greedy_action':{
            #     shape: ['task.action_dim',], 
            #     target_location: ['critic_body', 'extra_inputs']
            # },
            ########################
            ########################
        }

        # Dictionnaries of keys living inside the 'infos' OpenAI Gym's output.
        # Value is a tuple where the first element is the expected shape of the extra input,
        # and the second item is the location where the input should be stored in the framestate.
        # Parsing of the shape will infer where to fetch the value when encountering a string.

        
smallMLP: &smallMLP
        #sad: True 
        # R2D2:
        actor_arch: 'None'
        # R2D2: 
        #critic_arch: 'LSTM-RNN'

        # R2D2:
        # #critic_arch_feature_dim: 32
        # critic_arch_hidden_units: [512, 512]


        phi_arch: 'MLP'
        critic_arch: 'None'
        
        goal_phi_arch: 'None'
        
        # Phi Body:
        phi_arch_feature_dim: 256
        phi_arch_hidden_units: [256,]

        # Actor architecture:
        actor_arch_hidden_units: []
        # Critic architecture:
        critic_arch_hidden_units: []

        # Goal Phi Body:
        goal_phi_arch_feature_dim: 256
        goal_phi_arch_hidden_units: [256,]

        # Critic architecture:
        goal_critic_arch_hidden_units: []

        extra_inputs_infos: {
            # 'previous_reward':{
            #     shape: [1,], 
            #     target_location: ['critic_body', 'extra_inputs'],
            # },
            # 'previous_action':{
            #     shape: ['task.action_dim',], 
            #     target_location: ['critic_body', 'extra_inputs'],
            # },
            'desired_goal':{
                shape: ['task.observation_shape'],
                target_location: ['phi_body', 'extra_inputs'],
            },
            ########################
            # WITH SAD:
            ########################
            # 'greedy_action':{
            #     shape: ['task.action_dim',], 
            #     target_location: ['critic_body', 'extra_inputs']
            # },
            ########################
            ########################
        }

        # Dictionnaries of keys living inside the 'infos' OpenAI Gym's output.
        # Value is a tuple where the first element is the expected shape of the extra input,
        # and the second item is the location where the input should be stored in the framestate.
        # Parsing of the shape will infer where to fetch the value when encountering a string.

        
dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5: &dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        double: False
        dueling: False
        noisy: False 
        n_step: 1

        use_PER: False
        PER_alpha: 0.6
        PER_beta: 1.0

        replay_capacity: 1e6
        min_capacity: 1e4
        replay_period: 1

        use_HER:    False 
        HER_strategy:   'future-4'

        observation_resize_dim: None
        goal_resize_dim: None
        
        discount: 0.99
        use_cuda: True
        gradient_clip: 0.5
        batch_size: 32
        tau: 1.0e-2
        learning_rate: 2.5e-4
        adam_eps: 1.0e-8

        epsstart: 1.0
        epsend: 0.01    #0.1
        epsdecay: 3000 #1000000

        <<: *LargeCNN
        <<: *extra_hyperparameters


dqn_smallMLP_obs84_graclip5m1_b32_tau1m2_lr25m5: &dqn_smallMLP_obs84_graclip5m1_b32_tau1m2_lr25m5
        double: False
        dueling: False
        noisy: False 
        n_step: 1

        use_PER: False
        PER_alpha: 0.6
        PER_beta: 1.0

        replay_capacity: 1e6
        min_capacity: 128 #1e4
        replay_period: 240 #240
        
        use_HER:    False 
        HER_strategy:   'final-1'
        #HER_strategy:   'future-4'

        observation_resize_dim: None
        goal_resize_dim: None
        
        discount: 0.98 #0.99
        use_cuda: True
        gradient_clip: 0.5 #0.5
        batch_size: 128 #32
        tau: 2.5e-2 #1.0e-2
        learning_rate: 1.0e-3 #2.5e-4
        adam_eps: 1.0e-8

        epsstart: 1.0
        epsend: 0.02    #0.1
        epsdecay: 500 #1000000
        epsdecay_strategy:   'None'

        <<: *smallMLP
        <<: *extra_hyperparameters

r2d2_LargeCNN_graclip5m1_b128_tau4m4_lr6p25m5_L40_O10_B0: &r2d2_LargeCNN_graclip5m1_b128_tau4m4_lr6p25m5_L40_O10_B0
        #observation_resize_dim: 21 #56
        
        dueling: True
        noisy: False 
        n_step: 3

        use_PER: True
        PER_alpha: 0.9
        PER_beta: 0.6

        use_HER:    False 
        HER_strategy:   'future-4'

        replay_capacity: 5242880 # in terms of experience #1e6
        min_capacity: 4e5 #in terms of experiences... #1e4
        replay_period: 1
        
        actor_models_update_steps_interval: 10 #considering only 1 actor's steps.

        discount: 0.999
        use_cuda: True
        gradient_clip: 0.5
        batch_size: 128
        tau: 4.0e-4
        learning_rate: 6.25e-5
        adam_eps: 1.5e-5

        epsstart: 1.0
        epsend: 0.1
        epsdecay: 10000
        eps_greedy_alpha: 7.0

        sequence_replay_use_online_states: True
        sequence_replay_use_zero_initial_states: False
        sequence_replay_store_on_terminal: False
        
        r2d2_loss_masking: True
        r2d2_loss_masking_n_step_regularisation: True
        
        burn_in: False
        sequence_replay_unroll_length: 40
        sequence_replay_overlap_length: 10
        sequence_replay_burn_in_length: 0

        sequence_replay_PER_eta: 0.9

        <<: *LargeCNN
        <<: *extra_hyperparameters

r2d2_smallMLP_graclip5m1_b128_tau4m4_lr6p25m5_L40_O10_B0: &r2d2_smallMLP_graclip5m1_b128_tau4m4_lr6p25m5_L40_O10_B0
        #observation_resize_dim: 21 #56
        
        dueling: True
        noisy: False 
        n_step: 3

        use_PER: True
        PER_alpha: 0.9
        PER_beta: 0.6

        use_HER:    False 
        HER_strategy:   'future-4'

        replay_capacity: 5242880 # in terms of experience #1e6
        min_capacity: 4e5 #in terms of experiences... #1e4
        replay_period: 1
        
        actor_models_update_steps_interval: 10 #considering only 1 actor's steps.

        discount: 0.999
        use_cuda: True
        gradient_clip: 0.5
        batch_size: 128
        tau: 4.0e-4
        learning_rate: 6.25e-5
        adam_eps: 1.5e-5

        epsstart: 1.0
        epsend: 0.1
        epsdecay: 10000
        eps_greedy_alpha: 7.0

        sequence_replay_use_online_states: True
        sequence_replay_use_zero_initial_states: False
        sequence_replay_store_on_terminal: False
        
        r2d2_loss_masking: True
        r2d2_loss_masking_n_step_regularisation: True
        
        burn_in: False
        sequence_replay_unroll_length: 40
        sequence_replay_overlap_length: 10
        sequence_replay_burn_in_length: 0

        sequence_replay_PER_eta: 0.9

        <<: *smallMLP
        <<: *extra_hyperparameters

experiment:
    tasks: [{
        #'env-id': '20BitsSwap-v0', 
        'env-id': '10BitsSwap-v0',
        #'env-id': '15BitsSwap-v0',
        
        #'run-id': 'B7/B96k_EpPerCycle16_MLP256_GSflat_GSNotShared_final-1_lr1m3/Seed10_venv_dqn_Max+Sk0_St1_ObsNone_ClipReward_Eps5p2End2m2_tau40_GradClip5m1',
        #'run-id': 'WithHERTargetClamping/WithProperEpisodeStoring/B7/B96k_EpPerCycle16_MLP256_GSflat_GSNotShared_final-1_lr1m3/Seed10_venv1_r2d2_her_Max+Sk0_St1_ObsNone_ClipReward_Eps5p2End2m2_tau40_GradClip5m1',
        # LR 1e-3 : too aggressive...
        #'run-id': 'WithHERTargetClamping/RepP1/AdamEPS1m12+LR1m3+GradClip5m1/TrainItPerCycle40/B96k_EpPerCycle16_MLP256_GSflat_GSNotShared_final-1/Seed10_venv1_r2d2_her_Max+Sk0_St1_ObsNone_ClipReward_Eps5p2End2m2_tau40',
        # LR 1e-4 : good bootstrap but oscillating... + Grad Clip 5m0
        #'run-id': 'WithHERTargetClamping/RepP1/AdamEPS1m12+LR1m4+GradClip5m0/TrainItPerCycle40/B96k_EpPerCycle16_MLP256_GSflat_GSNotShared_final-1/Seed10_venv1_r2d2_her_Max+Sk0_St1_ObsNone_ClipReward_Eps5p2End2m2_tau40',
        # LR 2e-5 : GREAT bootstrap and no oscillation + Grad Clip 5m0
        'run-id': 'WithHERTargetClamping/RepP1/AdamEPS1m12+LR2m5+GradClip5m0/TrainItPerCycle40/B96k_EpPerCycle16_MLP256_GSflat_GSNotShared_final-1/Seed10_venv1_r2d2_her_Max+Sk0_St1_ObsNone_ClipReward_Eps5p2End2m2_tau40',
        # LR 1e-4 + tau 4m4 : Weird... + Grad Clip 5m0
        #'run-id': 'WithHERTargetClamping/RepP1/AdamEPS1m12+LR1m4+GradClip5m0+Tau4m4/TrainItPerCycle40/B96k_EpPerCycle16_MLP256_GSflat_GSNotShared_final-1/Seed10_venv1_r2d2_her_Max+Sk0_St1_ObsNone_ClipReward_Eps5p2End2m2_tau40',
        
        #'agent-id': '1step_double_HER_dqn_smallMLP_r1e5',
        #'agent-id': '1step_1m3Ent_r2d2_AdamLR6d25m5_EPS1m12_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m1OVER3p4_A2m0_gamma997_smallMLP_GradClip5m0_r1p5Min3e4_a9m1_b6m1_ovrN_e9m1_tau4m4_RepP1_NOBURNIN_b128_L2_O1_B0_NOZeroInitSt_OnlineSt_StoreOnDone',
        # NO Ent reg:
        #'agent-id': '1step_0Ent_r2d2_AdamLR6d25m5_EPS1m12_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m1OVER3p4_A2m0_gamma997_smallMLP_GradClip5m0_r1p5Min3e4_a9m1_b6m1_ovrN_e9m1_tau4m4_RepP1_NOBURNIN_b128_L2_O1_B0_NOZeroInitSt_OnlineSt_StoreOnDone',
        # RepP Actually !:
        #'agent-id': '1step_0Ent_r2d2_AdamLR6d25m5_EPS1m12_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m1OVER3p4_A2m0_gamma997_smallMLP_GradClip5m0_r1p5Min3e4_a9m1_b6m1_ovrN_e9m1_tau4m4_RepActuallyP1_NOBURNIN_b128_L2_O1_B0_NOZeroInitSt_OnlineSt_StoreOnDone',

        # DQN:
        #'agent-id': '1step_double_HER_dqnher_smallMLP_r1e5',
        # SImilar R2D2:
        #'agent-id': '1step_0Ent_r2d2_AdamLR2d0m5_EPS1m12_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m1OVER3p4_A2m0_gamma98_smallMLP_GradClip5m0_r1p5Min3e4_a9m1_b6m1_ovrN_e9m1_tau2d5m2_RepActuallyP1_NOBURNIN_b128_L10_O5_B0_NOZeroInitSt_OnlineSt_StoreOnDone',
        'agent-id': '1step_0Ent_r2d2_AdamLR2d0m5_EPS1m12_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m1OVER3p4_A2m0_gamma98_smallMLP_GradClip5m0_r1p5Min3e4_a6m1_b1m0_ovrN_e9m1_tau2d5m2_RepActuallyP1_NOBURNIN_b128_L2_O0_B0_NOZeroInitSt_OnlineSt_StoreOnDone',

        #'nbr_actor': 128,
        #'nbr_actor': 100,
        #'nbr_actor': 64,
        #'nbr_actor': 32,
        #'nbr_actor': 16,
        #'nbr_actor': 8,
        'nbr_actor': 1,
        'nbr_frame_skipping': 0,
        'nbr_frame_stacking': 1,
        'grayscale': False,
        'single_life_episode': False,
        'nbr_max_random_steps': 0,
        'clip_reward': True,
        
        'sad': False, 
        #'sad': True,
        'vdn': False, 
        #'vdn': True,
        #"otherplay": True,
        "otherplay": False,
        
        'previous_reward_action': False, #True,
        #'observation_resize_dim': (56,56),
        'observation_resize_dim': None,
        'goal_resize_dim': None,

        #
        'reload': 'None',
    },
    ]
    
    experiment_id: 'r2d2_BitsSwap_Benchmark'
    #benchmarking_episodes: 1
    benchmarking_episodes: 10
    benchmarking_interval: 1.0e10
    #benchmarking_interval: 1.0e3
    benchmarking_record_episode_interval: 1.0e8
    #benchmarking_record_episode_interval: 1.0e1 #1.0e20
    train_observation_budget: 96.0e3
    #train_observation_budget: 1.0e7
    seed: 10

agents:    
    1step_double_HER_dqnher_smallMLP_r1e5:
        <<: *dqn_smallMLP_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        double: True
        #dueling: True 
        #noisy: True 
        n_step: 1
        use_HER: True

        gradient_clip: 5.0 #0.5
        batch_size: 128 #32
        tau: 2.5e-2 #1.0e-2
        #tau: 4.0e-4
        #learning_rate: 2.5e-4
        #learning_rate: 1.0e-4 #2.5e-4
        learning_rate: 2.0e-5
        
        #adam_eps: 1.0e-8
        adam_eps: 1.0e-12

        replay_period: 1 #240

        nbr_training_iteration_per_cycle: 40
        nbr_episode_per_cycle:  16



    1step_noisy_double_HER_dqn_smallMLP_r1e5:
        <<: *dqn_smallMLP_obs84_graclip5m1_b32_tau1m2_lr25m5
        replay_capacity: 1e5
        double: True
        #dueling: True 
        noisy: True 
        n_step: 1
        use_HER: True

    3step_r2d2_AdamLR6d25m5_EPS1d5m5_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m2OVER3p4_gamma997_smallMLP_GradClip5m0_r5p4Min2e4_alpha9m1_beta6m1_overNone_eta9m1_tau4m4_RepP1_NOBURNIN_b32_L100_O0_B0_NOZeroInitSt_StoreOnDone: &3step_r2d2_AdamLR6d25m5_EPS1d5m5_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m2OVER3p4_gamma997_smallMLP_GradClip5m0_r5p4Min2e4_alpha9m1_beta6m1_overNone_eta9m1_tau4m4_RepP1_NOBURNIN_b32_L100_O0_B0_NOZeroInitSt_StoreOnDone
        <<: *r2d2_smallMLP_graclip5m1_b128_tau4m4_lr6p25m5_L40_O10_B0
        use_HER: True

        actor_models_update_steps_interval: 1 #considering only 1 actor's steps.

        vdn: False 
        vdn_nbr_players: 2

        batch_size: 32
        learning_rate: 6.25e-5
        adam_eps: 1.5e-5
        discount: 0.997
        gradient_clip: 5.0 
        # ...not specified in r2d2 paper but in Ape-X,
        # and r2d2 paper says that missing hyper-param
        # are the same as ape-X
        
        # replay_capacity: 5e4 #163840 #2e13*20 #5242880 # in terms of experience #1e6
        # min_capacity: 2e4 #in terms of experiences... #1e4
        replay_capacity: 1e5
        min_capacity: 128 #1e4
        replay_period: 240 #240

        PER_compute_initial_priority: False
        PER_beta_increase_interval: None #2e5
        
        double: True
        dueling: True 
        noisy: False
        n_step: 3
        tau: 4.0e-4
        
        sequence_replay_use_online_states: True
        sequence_replay_use_zero_initial_states: False
        sequence_replay_store_on_terminal: True
        
        r2d2_loss_masking: True
        r2d2_loss_masking_n_step_regularisation: True
        r2d2_bellman_target_SAD: False 

        burn_in: False
        sequence_replay_unroll_length: 100
        sequence_replay_overlap_length: 0
        sequence_replay_burn_in_length: 0
        

        epsstart: 1.0
        epsend: 0.05
        epsdecay: 30000 #1000000
        
        # ape-X and r2d2 keep it constant over each actor 
        # with a different value eps_i = base_eps**(1+\alpha*i/nbr_actors)
        # with base_eps=0.4 and \alpha = 7...
        eps_greedy_alpha: 7.0
    
    #1step_1m3Ent_r2d2_AdamLR6d25m5_EPS1m12_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m1OVER3p4_A2m0_gamma997_smallMLP_GradClip5m0_r1p5Min3e4_a9m1_b6m1_ovrN_e9m1_tau4m4_RepP1_NOBURNIN_b128_L2_O1_B0_NOZeroInitSt_OnlineSt_StoreOnDone:
    #1step_0Ent_r2d2_AdamLR6d25m5_EPS1m12_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m1OVER3p4_A2m0_gamma997_smallMLP_GradClip5m0_r1p5Min3e4_a9m1_b6m1_ovrN_e9m1_tau4m4_RepP1_NOBURNIN_b128_L2_O1_B0_NOZeroInitSt_OnlineSt_StoreOnDone:
    #1step_0Ent_r2d2_AdamLR6d25m5_EPS1m12_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m1OVER3p4_A2m0_gamma997_smallMLP_GradClip5m0_r1p5Min3e4_a9m1_b6m1_ovrN_e9m1_tau4m4_RepActuallyP1_NOBURNIN_b128_L2_O1_B0_NOZeroInitSt_OnlineSt_StoreOnDone:
    
    #1step_0Ent_r2d2_AdamLR2d0m5_EPS1m12_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m1OVER3p4_A2m0_gamma98_smallMLP_GradClip5m0_r1p5Min3e4_a9m1_b6m1_ovrN_e9m1_tau2d5m2_RepActuallyP1_NOBURNIN_b128_L10_O5_B0_NOZeroInitSt_OnlineSt_StoreOnDone:
    #1step_0Ent_r2d2_AdamLR2d0m5_EPS1m12_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m1OVER3p4_A2m0_gamma98_smallMLP_GradClip5m0_r1p5Min3e4_a6m1_b1m0_ovrN_e9m1_tau2d5m2_RepActuallyP1_NOBURNIN_b128_L10_O5_B0_NOZeroInitSt_OnlineSt_StoreOnDone:
    1step_0Ent_r2d2_AdamLR2d0m5_EPS1m12_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m1OVER3p4_A2m0_gamma98_smallMLP_GradClip5m0_r1p5Min3e4_a6m1_b1m0_ovrN_e9m1_tau2d5m2_RepActuallyP1_NOBURNIN_b128_L2_O0_B0_NOZeroInitSt_OnlineSt_StoreOnDone:
        <<: *3step_r2d2_AdamLR6d25m5_EPS1d5m5_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m2OVER3p4_gamma997_smallMLP_GradClip5m0_r5p4Min2e4_alpha9m1_beta6m1_overNone_eta9m1_tau4m4_RepP1_NOBURNIN_b32_L100_O0_B0_NOZeroInitSt_StoreOnDone
        weights_entropy_lambda: 0.0 
        #weights_entropy_lambda: 0.1
        #weights_entropy_lambda: 0.001 #01
    
        vdn: False 
        #vdn: True 
        vdn_nbr_players: 2
        sad: False 
        #sad: True

        gamma: 0.98
        #learning_rate: 6.25e-5
        learning_rate: 2.0e-5
        gradient_clip: 5.0 #0.5
        batch_size: 128 #32
        tau: 2.5e-2 #1.0e-2
        
        nbr_training_iteration_per_cycle: 40
        nbr_episode_per_cycle:  16
        replay_period: 1

        #adam_eps: 1.5e-5
        #learning_rate: 1.0e-3
        #adam_eps: 1.0e-8
        adam_eps: 1.0e-12
        #adam_eps: 1.0e-15

        n_step: 1
        #n_step: 3
        #n_step: 7
        
        #PER:
        #PER_alpha: 0.9
        #PER_beta: 0.6
        PER_alpha: 0.6
        PER_beta: 1.0

        burn_in: False
        #burn_in: True

        sequence_replay_unroll_length: 2 #10
        sequence_replay_overlap_length: 0 #5
        sequence_replay_burn_in_length: 0
        # #sequence_replay_burn_in_length: 10
        
        # sequence_replay_unroll_length: 100
        # sequence_replay_overlap_length: 50
        # sequence_replay_burn_in_length: 0
        
        epsend: 0.4
        eps_greedy_alpha: 2.0
        
        # Architecture:
        #critic_arch: 'LSTM-RNN'
        #critic_arch_hidden_units: [512, 512]
        #critic_arch_hidden_units: [512]
        #use_relu_after_rnn: False 

        # normal arch:
        # critic_arch: 'MLP-LSTM-RNN'
        # use_relu_after_rnn: True 
        # #use_relu_after_rnn: False 
        # critic_arch_feature_dim: 512
        # critic_arch_hidden_units: [512]

        # Arch2:
        # critic_arch: 'MLP-LSTM-RNN2'
        # use_relu_after_rnn: False #True 
        # use_residual_connection: True 
        # critic_arch_linear_hidden_units: [512, 256]
        # critic_arch_feature_dim: 128
        # critic_arch_hidden_units: [128, 128]

        # Arch 3:
        # critic_arch: 'MLP-LSTM-RNN2'
        # use_relu_after_rnn: True 
        # critic_arch_linear_hidden_units: [128]
        # critic_arch_feature_dim: 64
        # critic_arch_hidden_units: [64]

        #Arch 4:
        # critic_arch: 'MLP-LSTM-RNN2'
        # use_relu_after_rnn: True 
        # critic_arch_linear_hidden_units: [512, 256]
        # critic_arch_hidden_units: [256]
        # critic_arch_linear_post_hidden_units: [256]
        # critic_arch_feature_dim: 128

        # extra_inputs_infos: {
        #     'previous_reward':{
        #         shape: [1,], 
        #         target_location: ['critic_body', 'extra_inputs']
        #     },
        #     'previous_action':{
        #         shape: ['task.action_dim',], 
        #         target_location: ['critic_body', 'extra_inputs']
        #     },
        #     # 'action_mask':{
        #     #     shape: ['task.action_dim',], 
        #     #     target_location: ['critic_body', 'extra_inputs']
        #     # },
        #     ########################
        #     ########################
        #     ########################
        #     # WITH SAD:
        #     ########################
        #     # 'greedy_action':{
        #     #     #shape: [23], 
        #     #     shape: [108], 
        #     #     #shape: [43], 
        #     #     #shape: [58,], 
        #     #     target_location: ['critic_body', 'extra_inputs']
        #     # },
        #     ########################
        #     ########################
        #     # 'legal_actions':{
        #     #     shape: ['task.action_dim',], 
        #     #     target_location: ['head', 'extra_inputs']
        #     # },
               
        # }

    
    
    
    