project: EReLELA 
entity: kdenamganai0856
program: benchmark_wandb_ether_rppo.py
command:
        - ${env}
        - /home/kdena/.virtualenvs/erelela_venv/bin/python
        - ${program}
        - ${args}
method: bayes
metric:
        name: Wrappers/LanguageGuidedCuriosity/CoverageRatio
        goal: maximize
parameters:
        seed:
                values: [20]
        config:
                values: [maze_miniworld_wandb_benchmark_ETHER+RPPO+RP_config.yaml]
        language_guided_curiosity:
                values: [True]
        MiniWorld_entity_visibility_oracle:
                values: [True ]
        MiniWorld_entity_visibility_oracle_top_view:
                values: [False ]
        use_ETHER:
                values: [False ]
        use_THER:
                values: [False ]
        use_RP:
                values: [True ]
        RP_use_RP:
                values: [True ]
        use_HER:
                values: [False ]
        goal_oriented:
                values: [False ]
        ETHER_use_ETHER:
                values: [False ]
        THER_use_THER:
                values: [False ]
        ETHER_rg_sanity_check_compactness_ambiguity_metric:
                values: [False ]
        ETHER_rg_shared_architecture:
                values: [False ] 
        ETHER_rg_with_logits_mdl_principle:
                values: [True ]
        ETHER_rg_logits_mdl_principle_factor:
                values: [1.0e-3 ]
        ETHER_rg_logits_mdl_principle_accuracy_threshold:
                values: [10.0 ]
        ETHER_rg_agent_loss_type:
                values: [Impatient+Hinge ]
        ETHER_use_supervised_training:
                values: [False ]
        ETHER_rg_use_semantic_cooccurrence_grounding:
                values: [True ]
        ETHER_rg_semantic_cooccurrence_grounding_lambda:
                values: [1.0 ]
        ETHER_rg_semantic_cooccurrence_grounding_noise_magnitude:
                values: [0.2 ]
        ETHER_lock_test_storage:
                values: [True ]
        ETHER_rg_filter_out_non_unique:
                values: [False]
        ETHER_rg_with_color_jitter_augmentation:
                values: [False ] 
        ETHER_rg_with_gaussian_blur_augmentation:
                values: [True ]
        ETHER_rg_egocentric:
                values: [False ]
        ETHER_rg_object_centric_version:
                values: [2 ] 
        ETHER_rg_descriptive_version:
                values: [1 ]
        ETHER_rg_learning_rate:
                values: [6.25e-5]
        ETHER_rg_weight_decay:
                values: [0.0 ]
        ETHER_rg_vocab_size:
                values: [64]
        ETHER_rg_training_period:
                values: [4096 ]
        ETHER_rg_descriptive:
                values: [False]
        ETHER_rg_use_curriculum_nbr_distractors:
                values: [False ]
        ETHER_rg_nbr_epoch_per_update:
                values: [1]
        ETHER_rg_accuracy_threshold:
                values: [99 ]
        ETHER_rg_nbr_train_distractors:
                values: [7]
        ETHER_rg_nbr_test_distractors:
                values: [7 ]
        ETHER_replay_capacity:
                values: [2048]
        ETHER_test_replay_capacity:
                values: [512 ]
        ETHER_rg_distractor_sampling:
                values: [similarity-90 ]
        RP_use_PER:
                values: [True ]
        RP_lock_test_storage:
                values: [False ]
        RP_replay_capacity:
                values: [2048]
        RP_min_capacity:
                values: [32 ]
        RP_predictor_nbr_minibatches:
                values: [4, 8]
        RP_predictor_batch_size:
                values: [256 ]
        RP_predictor_test_train_split_interval:
                values: [3]
        RP_test_replay_capacity:
                values: [512 ]
        RP_test_min_capacity:
                values: [32]
        RP_replay_period:
                values: [4096, 8192 ]
        RP_nbr_training_iteration_per_update:
                values: [4, 8 ]
        RP_predictor_accuracy_threshold:
                values: [90 ]
        THER_use_PER:
                values: [True]
        THER_observe_achieved_goal:
                values: [False ]
        THER_lock_test_storage:
                values: [True ]
        THER_feedbacks_failure_reward:
                values: [0]
        THER_feedbacks_success_reward:
                values: [1 ]
        THER_episode_length_reward_shaping:
                values: [True ]
        THER_replay_capacity:
                values: [1e2]
        THER_min_capacity:
                values: [4 ]
        THER_predictor_nbr_minibatches:
                values: [1]
        THER_predictor_batch_size:
                values: [32 ]
        THER_predictor_test_train_split_interval:
                values: [5]
        THER_test_replay_capacity:
                values: [1e2 ]
        THER_test_min_capacity:
                values: [4]
        THER_replay_period:
                values: [1028 ]
        THER_train_on_success:
                values: [False]
        THER_nbr_training_iteration_per_update:
                values: [128 ]
        THER_predict_PADs:
                values: [False]
        THER_predictor_accuracy_threshold:
                values: [0.95 ]
        THER_predictor_accuracy_safe_to_relabel_threshold:
                values: [0.2]
        THER_filter_predicate_fn:
                values: [True ]
        THER_relabel_terminal:
                values: [False]
        THER_filter_out_timed_out_episode:
                values: [True ]
        THER_train_contrastively:
                values: [False]
        THER_contrastive_training_nbr_neg_examples:
                values: [0 ]
        single_pick_episode:
                values: [True]
        THER_timing_out_episode_length_threshold:
                values: [400 ]
        BabyAI_Bot_action_override:
                values: [False ]
        adam_eps:
                values: [1.0e-12, 1.0e-5]
        optimization_epochs:
                values: [4, 8]
        standardized_adv:
                values: [True ]
        discount:
                values: [0.999, 0.99]
        ppo_ratio_clip:
                values: [0.1, 0.2 ]
        nbr_actor:
                values: [8, 32, 64]
        mini_batch_size:
                values: [256]
        batch_size:
                values: [256]
        learning_rate:
                values: [2.5e-4, 6.25e-5]
        gradient_clip:
                values: [0.5, 5.0 ]
        entropy_weight:
                values: [0.1, 0.01, 0.001 ]
        sequence_replay_store_on_terminal:
                values: [False ]
        sequence_replay_burn_in_ratio:
                values: [0.0, 0.5 ]
        sequence_replay_unroll_length:
                values: [8, 32 ]
        sequence_replay_use_online_states:
                values: [True ]
        sequence_replay_use_zero_initial_states:
                values: [False, True ]
        adam_weight_decay:
                values: [0.0 ]
        time_limit:
                values: [100 ]
        train_observation_budget:
                values: [4.0e5]


