default_mier_config = dict(
    seed=0,
    env_params = dict(),
    max_path_length=200,
    device='cpu',
    critic_nn_arch='300-300-300',
    actor_nn_arch='300-300-300',
    log_dir=None,
    log_annotation="default",
    load_model_itr=None,

    load_path_prefix= None,
    train_data_load_path=None,
    val_data_load_path=None,
    test_data_load_path = None,
    cross_task_data_load_path=None,
    replay_buffer_load_path=None,
    pre_adapt_replay_buffer_load_path=None,
    continue_training_from_loaded_model=False,

    sac_load_path=None,
    model_load_path=None,
    state_model_load_path=None,
    reward_model_load_path=None,

    initial_exploration_repeats=1,
    num_train_epochs=500,
    num_tasks_sample=5,
    replay_buffer_max_sample_size=int(1e6),
    context_dim=5,
    save_interval = 50,
    num_training_steps_per_epoch=2000,

    batch_size=256,
    num_extrapol_epochs=10,
    train_val_ratio=0.8,
    model_real_ratio=0.05,
    relabel_data_for_extrapolation=False,
    adapt_model_for_extrapolation=False,
    off_policy_relabelling=False,
    relabelling_interval=250,
    num_sac_steps_per_epoch=1000,
    rollout_batch_size=int(100e3),
    num_cross_tasks_for_relabelling=20,
    num_sac_repeat_steps_for_extrapol = 2,
    log_metrics_for_every_adapt_step=True,
    discard_term_states_while_relabelling=False,

    num_fast_adapt_steps_for_context=10,
    num_extra_fast_adapt_steps_for_model=100,
 
    joint_state_reward_model = False,
    model_rollout_length = 1,
    is_model_context_conditioned=False,
    multi_task=True,
    cross_task_relabelling_for_training=False,
    cross_task_relabelling_for_testing=True,

    meta_learn_state_dynamics = True,
    meta_learn_reward = True,

    model_hyperparams=dict(
        name='BNN',
        meta_train=True,
        is_deterministic=False,
        num_nets=1,
        num_elites=1,
        reg_weight=0,
        fixed_preupdate_context=True,
        state_pred=True,
        rew_pred=True,
        reward_prediction_weight=1,
        hidden_dim=200,
        meta_batch_size=10,
        fast_adapt_steps=2,
        fast_adapt_lr=0.01,
        clip_val_inner_grad=10,
        clip_val_outer_grad=10,
   ),

    sac_hyperparams=dict(
        log_alpha_init=-1,
        discount_factor=0.99,
        critic_learning_rate=3e-4,
        actor_learning_rate=3e-4,
        target_update_interval=1,
        target_update_rate=0.005,
        sac_reward_scale=1,
        soft_temperature=1.0,
        target_entropy=-1.0,
        sac_optimizer='adam',
        automatic_entropy_tuning=True,
    )
)

