{
    "model_name_or_path": "google/t5-v1_1-base",
    "tokenizer_name": "google/t5-v1_1-base",
    "learning_rate": 3e-4,
    "lr_scheduler": "constant_w_warmup",
    "output_dir": "outputs/task_routing_same_rte_mnli_onlyencoder_t5_v1_1_base_p3_adapter/",
    "max_source_length": 128,
    "max_target_length": 128 ,
    "val_max_target_length":128,
    "test_max_target_length":128,
    "num_train_epochs": 100,
    "warmup_steps": 2000,
    "eval_steps": 5000,
    "overwrite_output_dir": true,
    "label_smoothing": 0.1,
    "per_device_train_batch_size":128,
    "per_device_eval_batch_size":128,
    "gradient_accumulation_steps": 1,
    "save_steps": 5000,
    "logging_first_step":true,
    "logging_steps": 200,
    "save_total_limit": 1,
    "temperature": 1,
    "do_train": true,
    "do_test": true,
    "do_eval": true,
    "check_mode": false,
    "predict_with_generate": true,
    "task_embedding_dim": 512,
    "split_validation_test": true,
    "non_linearity": "gelu_new",
    "load_best_model_at_end": true,
    "evaluation_strategy": "steps",
    "metric_for_best_model": "eval_average_metrics",
    "greater_is_better": true,
    "max_steps": 600000,
    "tasks": ["rte", "sst2", "mrpc", "stsb", "qqp", "mnli", "qnli", "cola"],
    "eval_tasks": ["rte", "sst2", "mrpc", "stsb", "qqp", "mnli", "qnli", "cola"],
    "train_adapters": true,
    "reduction_factor": 32, 
    "unfreeze_lm_head": false,
    "unfreeze_layer_norms": true,
    "adapter_config_name": "adapter",
    "num_adapters": 8,
    "routing_estimator": "task_routing",
    "model_dim": 768,
    "load_loss_accm": null,
    "num_routers":1,
    "data_seed": 42,
    "same_rte_mnli": true,
    "only_in_encoder": true,
    "analyze_model":false
    }