project: ETHER 
entity: kdenamganai0856
program: benchmark_wandb_ether.py
command:
        - ${env}
        - /home/kdena/.virtualenvs/ilela_venv/bin/python
        - ${program}
        - ${args}
method: grid
metric:
        name: PerEpoch/test/repetition0/comm_roun0/referential_game_accuracy/Mean
        goal: maximize
parameters:
        
        seed:
                values: [20]

        config:
                values: [miniworld_wandb_benchmark_ETHER_config.yaml]

        n_step:
                values: [3]
        nbr_actor:
                values: [2]
        eps_greedy_alpha:
                values: [2.0]

        nbr_minibatches:
                values: [1]
        batch_size:
                values: [64, 128]

        min_capacity:
                values: [5e3]
        replay_capacity:
                values: [6e3]
        learning_rate:
                values: [6.25e-5]

        sequence_replay_burn_in_ratio:
                values: [0.0]
        weights_entropy_lambda:
                values: [0.0]

        sequence_replay_unroll_length:
                values: [20]
        sequence_replay_overlap_length:
                values: [10]

        sequence_replay_use_online_states:
                values: [True]
        sequence_replay_use_zero_initial_states:
                values: [False]

        sequence_replay_store_on_terminal:
                values: [False]
        HER_target_clamping:
                values: [False]

        adam_weight_decay:
                values: [0.0]
        ther_adam_weight_decay:
                values: [0.0]

        nbr_training_iteration_per_cycle:
                values: [40]
        nbr_episode_per_cycle:
                values: [16]

        ETHER_use_ETHER:
                values: [True]
        THER_use_THER:
                values: [True]

        ETHER_rg_shared_architecture:
                values: [True, False]

        ETHER_rg_with_logits_mdl_principle:
                values: ['True']
        ETHER_rg_logits_mdl_principle_factor:
                values: [1.0e-1, 1.0e-3, 1.0e-6]
        
        ETHER_rg_agent_loss_type:
                values: ['Impatient+Hinge', 'Hinge']
        
        ETHER_use_supervised_training:
                values: [False]

        ETHER_lock_test_storage:
                values: [True]
        ETHER_rg_filter_out_non_unique:
                values: [False]

        ETHER_rg_with_color_jitter_augmentation:
                values: [False]
        ETHER_rg_with_gaussian_blur_augmentation:
                values: [True, False]

        ETHER_rg_egocentric:
                values: [True, False]

        ETHER_rg_object_centric_version:
                values: [2]
        ETHER_rg_descriptive_version:
                values: [1]

        ETHER_rg_learning_rate:
                values: [6.25e-5]
        ETHER_rg_weight_decay:
                values: [0.0]

        ETHER_rg_vocab_size:
                values: [64]
        ETHER_rg_training_period:
                values: [4096]

        ETHER_rg_descriptive:
                values: [False]
        ETHER_rg_use_curriculum_nbr_distractors:
                values: [False]

        ETHER_rg_nbr_epoch_per_update:
                values: [512]
        ETHER_rg_accuracy_threshold:
                values: [99]

        ETHER_rg_nbr_train_distractors:
                values: [7]
        ETHER_rg_nbr_test_distractors:
                values: [7]

        ETHER_replay_capacity:
                values: [1024]
        ETHER_test_replay_capacity:
                values: [512]

        ETHER_train_dataset_length:
                values: [1024]
        ETHER_test_dataset_length:
                values: [512]

        ETHER_rg_distractor_sampling:
                values: [uniform, similarity-90]

        THER_use_PER:
                values: [True]
        THER_observe_achieved_goal:
                values: [False]

        THER_lock_test_storage:
                values: [True]

        THER_feedbacks_failure_reward:
                values: [0]
        THER_feedbacks_success_reward:
                values: [1]

        THER_episode_length_reward_shaping:
                values: [True]

        THER_replay_capacity:
                values: [1e2]
        THER_min_capacity:
                values: [12]

        THER_predictor_nbr_minibatches:
                values: [1]
        THER_predictor_batch_size:
                values: [32]

        THER_predictor_test_train_split_interval:
                values: [5]
        THER_test_replay_capacity:
                values: [1e2]

        THER_test_min_capacity:
                values: [4]
        THER_replay_period:
                values: [6144]

        THER_train_on_success:
                values: [False]
        THER_nbr_training_iteration_per_update:
                values: [128]

        THER_predict_PADs:
                values: [False]
        THER_predictor_accuracy_threshold:
                values: [0.95]

        THER_predictor_accuracy_safe_to_relabel_threshold:
                values: [0.2]
        THER_filter_predicate_fn:
                values: [True]

        THER_relabel_terminal:
                values: [False]
        THER_filter_out_timed_out_episode:
                values: [False]

        THER_train_contrastively:
                values: [False]
        THER_contrastive_training_nbr_neg_examples:
                values: [0]

        single_pick_episode:
                values: [False]
        THER_timing_out_episode_length_threshold:
                values: [40]

        BabyAI_Bot_action_override:
                values: [False]
        train_observation_budget:
                values: [6144]

