project: THER 
entity: near3213
program: benchmark_wandb_ther.py
command:
        - ${env}
        - WANDB_CACHE_DIR=./wandb_cache/ /scratch/kyd500/ETHER/ether_env/bin/python
        - ${program}
        - ${args}
method: grid
metric:
        name: PerEpisode/SuccessRatio
        goal: maximize
parameters:
        config:
                values: ["babyAI_wandb_benchmark_AgnosticFiLMedTHER_config.yaml"]
        seed:
                values: [10, 20, 30, 40, 50]
        learning_rate:
                values: [6.25e-5]
        batch_size:
                values: [32]
        n_step:
                values: [1, 3]
        min_capacity:
                values: [5e3] 
        replay_capacity:
                values: [10e3]
        sequence_replay_unroll_length:
                values: [20]
        sequence_replay_overlap_length:
                values: [10]
        sequence_replay_burn_in_ratio:
                values: [0.0, 0.5]
        tau:
                values: [0.0004]
        nbr_actor:
                values: [32]
        r2d2_use_value_function_rescaling:
                values: [False]
        train_observation_budget:
                values: [5.0e5]
        weights_entropy_lambda:
                values: [0.0] 
        adam_weight_decay:
                values: [0.0] 
        ther_adam_weight_decay: 
                values: [0.0] 
        THER_use_THER: 
                values: [True, False]
        THER_use_PER: 
                values: [True]
        THER_episode_length_reward_shaping: 
                values: [True, False]
        THER_replay_capacity: 
                values: [1e3] 
        THER_min_capacity: 
                values: [16]
        THER_predictor_test_train_split_interval: 
                values: [5]
        THER_test_replay_capacity: 
                values: [256]
        THER_test_min_capacity: 
                values: [64]
        THER_replay_period: 
                values: [2048]
        THER_train_on_success: 
                values: [False]
        THER_nbr_training_iteration_per_update:             
                values: [128]
        THER_predict_PADs:      
                values: [False]
        THER_predictor_accuracy_threshold: 
                values: [0.95]
        THER_predictor_accuracy_safe_to_relabel_threshold: 
                values: [0.2, 0.5]
        THER_filter_predicate_fn: 
                values: [False, True]
        THER_relabel_terminal: 
                values: [False]
        THER_filter_out_timed_out_episode: 
                values: [True]
        THER_train_contrastively: 
                values: [True]
        THER_contrastive_training_nbr_neg_examples:
                values: [0, 1]
        single_pick_episode:
                values: [True]
        THER_timing_out_episode_length_threshold:
                values: [40]
 
