extra_hyperparameters: &extra_hyperparameters
    lr_account_for_nbr_actor: False 
    weights_decay_lambda: 1.0
    use_target_to_gather_data:    False
    goal_oriented: True 
    goal_state_shared_arch:  False
    goal_state_flattening: True
    nbr_training_iteration_per_cycle: 40
    nbr_episode_per_cycle:  16
    HER_use_latent: True

LargeCNN: &LargeCNN
        phi_arch: 'CNN'
        critic_arch: 'None'
        
        goal_phi_arch: 'None'
        
        # Phi Body:
        phi_arch_channels: [32, 64, 64]
        phi_arch_kernels: [8, 4, 3]
        phi_arch_strides: [4, 2, 1]
        phi_arch_paddings: [1, 1, 1]
        phi_arch_feature_dim: 512
        phi_arch_hidden_units: [512,]

        # Actor architecture:
        actor_arch_hidden_units: []
        # Critic architecture:
        critic_arch_hidden_units: []

        # Goal Phi Body:
        goal_phi_arch_channels: [32, 64, 64]
        goal_phi_arch_kernels: [8, 4, 3]
        goal_phi_arch_strides: [4, 2, 1]
        goal_phi_arch_paddings: [1, 1, 1]
        goal_phi_arch_feature_dim: 512
        goal_phi_arch_hidden_units: [512,]

        # Critic architecture:
        goal_critic_arch_hidden_units: []


smallMLP: &smallMLP
        phi_arch: 'MLP'
        critic_arch: 'None'
        
        goal_phi_arch: 'None'
        
        # Phi Body:
        phi_arch_channels: [32, 64, 64]
        phi_arch_kernels: [8, 4, 3]
        phi_arch_strides: [4, 2, 1]
        phi_arch_paddings: [1, 1, 1]
        phi_arch_feature_dim: 256
        phi_arch_hidden_units: [256,]

        # Actor architecture:
        actor_arch_hidden_units: []
        # Critic architecture:
        critic_arch_hidden_units: []

        # Goal Phi Body:
        goal_phi_arch_channels: [32, 64, 64]
        goal_phi_arch_kernels: [8, 4, 3]
        goal_phi_arch_strides: [4, 2, 1]
        goal_phi_arch_paddings: [1, 1, 1]
        goal_phi_arch_feature_dim: 256
        goal_phi_arch_hidden_units: [256,]

        # Critic architecture:
        goal_critic_arch_hidden_units: []

        
dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5: &dqn_LargeCNN_obs84_graclip5m1_b32_tau1m2_lr25m5
        double: False
        dueling: False
        noisy: False 
        n_step: 1

        use_PER: False
        PER_alpha: 0.6
        PER_beta: 1.0

        replay_capacity: 1e6
        min_capacity: 1e4
        replay_period: 1

        use_HER:    False 
        HER_strategy:   'future-4'

        observation_resize_dim: None
        goal_resize_dim: None
        
        discount: 0.99
        use_cuda: True
        gradient_clip: 0.5
        batch_size: 32
        tau: 1.0e-2
        learning_rate: 2.5e-4
        adam_eps: 1.0e-8

        epsstart: 1.0
        epsend: 0.01    #0.1
        epsdecay: 3000 #1000000

        <<: *LargeCNN
        <<: *extra_hyperparameters


dqn_LargeCNN: &dqn_LargeCNN
        double: False
        dueling: False
        noisy: False 
        n_step: 1

        use_PER: False
        PER_alpha: 0.6
        PER_beta: 1.0

        replay_capacity: 1e6
        min_capacity: 128 #1e4
        replay_period: 240 #240

        use_HER:    False 
        HER_strategy:   'future-1'

        observation_resize_dim: None
        goal_resize_dim: None
        
        discount: 0.98 #0.99
        use_cuda: True
        gradient_clip: 0.0 #0.5
        batch_size: 128 #32
        tau: 2.5e-2 #1.0e-2
        learning_rate: 3.0e-4 #2.5e-4
        adam_eps: 1.0e-8

        epsstart: 1.0
        epsend: 0.02    #0.1
        epsdecay: 500 #1000000
        epsdecay_strategy:   'None'

        <<: *LargeCNN
        <<: *extra_hyperparameters

experiment:
    tasks: [
            {'env-id': '3BitsSwap-HardMNIST-v0',

             'run-id': 'B120k_EpPerCycle16_MLP256_GSflat_GSNotShared_future-1_lr3m4/Seed10_venv_dqn_Max+Sk0_St1_ObsNone_ClipReward_Eps5p2End2m2_tau40_GradClip0',
             #'agent-id': '1step_double_prHER_dqn_LargeCNN_r1e5',
             #'agent-id': '1step_noisy_double_HER_dqn_LargeCNN_r1e5',
             'agent-id': '1step_noisy_prioritized_double_HER_dqn_LargeCNN_r1e5',
             
             'nbr_actor': 1,
             'nbr_frame_skipping': 0,
             'nbr_frame_stacking': 1,
             'grayscale': False,
             'single_life_episode': False,
             'nbr_max_random_steps': 0,
             'clip_reward': True,
             'observation_resize_dim': None,
             'goal_resize_dim': None,
             },
            ]
    experiment_id: 'BitsSwap_Benchmark'
    benchmarking_episodes: 1
    benchmarking_interval: 1.0e10
    benchmarking_record_episode_interval: 1.0e8
    train_observation_budget: 120.0e3
    seed: 10

agents:    
    1step_double_HER_dqn_LargeCNN_r1e5:
        <<: *dqn_LargeCNN
        replay_capacity: 1e5
        double: True
        #dueling: True 
        #noisy: True 
        n_step: 1
        use_HER: True

    1step_noisy_double_HER_dqn_LargeCNN_r1e5:
        <<: *dqn_LargeCNN
        replay_capacity: 1e5
        double: True
        #dueling: True 
        noisy: True 
        n_step: 1
        use_HER: True

    1step_noisy_prioritized_double_HER_dqn_LargeCNN_r1e5:
        <<: *dqn_LargeCNN
        replay_capacity: 1e5
        double: True
        #dueling: True 
        noisy: True 
        n_step: 1
        use_HER: True
        use_PER: True
        PER_alpha: 0.7
        PER_beta: 0.4
