project: EReLELA-MultiRoom-Benchmark
entity: near3213 
program: ../benchmark_wandb_erelela.py
command:
    - ${env}
    - ./venv/bin/python
    - ${program}
    - ${args}
method: bayes
early_terminate:
    type: hyperband
    max_iter: 10000
    eta: 3
    s: 3
    strict: true
metric:
    name: PerEpisode/Metrics/coverage_count
    goal: maximize
parameters:
    seed:
        values: [10, 20, 30]

    env_seed:
        values: [12]
    static_envs:
        values: [False]

    with_early_stopping:
        values: [False]

    use_cuda:
        values: [True]

    success_threshold:
        values: [0.01]

    config:
        values: [
            multiroom_N7_S4_minigrid_wandb_benchmark_POMDPERELELA_config.yaml,
            multiroom_N7_S4_minigrid_wandb_benchmark_AgnosticPOMDPERELELA_config.yaml,
        ]

    language_guided_curiosity:
        values: [False]

    language_guided_curiosity_descr_type:
        values: ['descr']

    language_guided_curiosity_extrinsic_weight:
        values: [10.0]

    language_guided_curiosity_intrinsic_weight:
        values: [0.1]

    language_guided_curiosity_binary_reward:
        values: [False]

    language_guided_curiosity_densify:
        values: [False]

    language_guided_curiosity_non_episodic_dampening_rate:
        values: [0.0]

    coverage_manipulation_metric:
        values: [True]

    MiniWorld_entity_visibility_oracle:
        values: [False]

    MiniWorld_entity_visibility_oracle_language_specs:
        values: ['none']

    MiniWorld_entity_visibility_oracle_too_far_threshold:
        values: [-1.0]

    MiniWorld_entity_visibility_oracle_include_discrete_depth:
        values: [True]

    MiniWorld_entity_visibility_oracle_include_depth_precision:
        values: [-1]

    MiniWorld_entity_visibility_oracle_top_view:
        values: [False]

    PER_alpha:
        values: [0.5]
    PER_beta:
        values: [1.0]

    PER_use_rewards_in_priority:
        values: [False]

    sequence_replay_PER_eta:
        values: [0.9]

    PER_compute_initial_priority:
        values: [True]

    use_ETHER:
        values: [False]
    use_THER:
        values: [False]

    use_RP:
        values: [False]
    RP_use_RP:
        values: [True]

    use_ELA:
        values: [True]
    ELA_use_ELA:
        values: [True]

    use_HER:
        values: [False]
    goal_oriented:
        values: [False]

    ETHER_use_ETHER:
        values: [False]
    THER_use_THER:
        values: [False]

    ELA_with_rg_training:
        values: [True]

    ELA_rg_use_cuda:
        values: [True]

    ELA_rg_graphtype:
        values: ['straight_through_gumbel_softmax']

    ELA_rg_obverter_threshold_to_stop_message_generation:
        values: [0.9]

    ELA_rg_obverter_nbr_games_per_round:
        values: [32]

    ELA_rg_obverter_sampling_round_alternation_only:
        values: [False]
    ELA_rg_use_obverter_sampling:
        values: [False]

    ELA_rg_compactness_ambiguity_metric_language_specs:
        values: ['emergent+natural+color+shape+shuffled-emergent+shuffled-natural+shuffled-color+shuffled-shape']

    ELA_rg_sanity_check_compactness_ambiguity_metric:
        values: [False]

    ELA_rg_shared_architecture:
        values: [True, False]

    ELA_rg_logits_mdl_principle_factor:
        values: [0.0, 1.0e-2, 1.0e-3, 1.0e-4]

    ELA_rg_logits_mdl_principle_accuracy_threshold:
        values: [20.0, 50.0, 60.0]

    ELA_rg_agent_loss_type:
        values: [Impatient+Hinge, Hinge]

    ELA_rg_use_semantic_cooccurrence_grounding:
        values: [False]

    ELA_rg_semantic_cooccurrence_grounding_lambda:
        values: [1.0]

    ELA_rg_semantic_cooccurrence_grounding_noise_magnitude:
        values: [0.2]

    ELA_lock_test_storage:
        values: [False]

    ELA_rg_color_jitter_prob:
        values: [0.0]

    ELA_rg_gaussian_blur_prob:
        values: [0.5, 1.0]

    ELA_rg_egocentric_prob:
        values: [0.0]

    ELA_rg_object_centric_version:
        values: [2]
    ELA_rg_descriptive_version:
        values: [1]

    ELA_rg_learning_rate:
        values: [3e-4, 1e-3, 6.25e-5]
    ELA_rg_weight_decay:
        values: [0.0]

    ELA_rg_l1_weight_decay:
        values: [0.0, 1e-3, 1e-4]
    ELA_rg_l2_weight_decay:
        values: [0.0]

    ELA_rg_vocab_size:
        values: [32, 64] 
    ELA_rg_max_sentence_length:
        values: [64, 128]

    ELA_rg_training_period:
        values: [32768]

    ELA_rg_descriptive:
        values: [True]
    ELA_rg_use_curriculum_nbr_distractors:
        values: [False]

    ELA_rg_nbr_epoch_per_update:
        values: [32]
    ELA_rg_accuracy_threshold:
        values: [50, 60, 65, 70]

    ELA_rg_nbr_train_distractors:
        values: [0, 1, 2, 3]
    ELA_rg_nbr_test_distractors:
        values: [3]

    ELA_replay_capacity:
        values: [8192]
    ELA_test_replay_capacity:
        values: [2048]

    ELA_rg_distractor_sampling:
        values: [uniform]

    ELA_reward_extrinsic_weight:
        values: [10.0]
    ELA_reward_intrinsic_weight:
        values: [0.1]

    ELA_feedbacks_failure_reward:
        values: [0.0]
    ELA_feedbacks_success_reward:
        values: [1]

    BabyAI_Bot_action_override:
        values: [False]

    n_step:
        values: [3]
    nbr_actor:
        values: [32]

    epsstart:
        values: [1.0]
    epsend:
        values: [0.1]

    epsdecay:
        values: [100000]
    eps_greedy_alpha:
        values: [2.0]

    nbr_minibatches:
        values: [1]
    batch_size:
        values: [64]

    min_capacity:
        values: [4e3]
    min_handled_experiences:
        values: [28e3, 4e3]
    replay_capacity:
        values: [20e3]
    learning_rate:
        values: [6.25e-5]

    sequence_replay_burn_in_ratio:
        values: [0.5]
    weights_entropy_lambda:
        values: [0.0]

    sequence_replay_unroll_length:
        values: [20]
    sequence_replay_overlap_length:
        values: [10]

    sequence_replay_use_online_states:
        values: [True]
    sequence_replay_use_zero_initial_states:
        values: [False]

    sequence_replay_store_on_terminal:
        values: [False]
    HER_target_clamping:
        values: [False]

    adam_weight_decay:
        values: [0.0]
    ther_adam_weight_decay:
        values: [0.0]

    nbr_training_iteration_per_cycle:
        values: [2]
    nbr_episode_per_cycle:
        values: [0]

    single_pick_episode:
        values: [False]

    terminate_on_completion:
        values: [True]

    allow_carrying:
        values: [False]

    time_limit:
        values: [0]

    benchmarking_record_episode_interval:
        values: [400]

    benchmarking_interval:
        values: [1.0e7]

    train_observation_budget:
        values: [1.0e6]

 
