{
"model_name_or_path": "google/t5-v1_1-base",
"tokenizer_name": "google/t5-v1_1-base",
"learning_rate": 3e-4,
"lr_scheduler": "constant_w_warmup",
"output_dir": "outputs/one_adapter_t5_v1_1_base_p3_adapter/",
"max_source_length": 128,
"max_target_length": 128 ,
"val_max_target_length":128,
"test_max_target_length":128,
"num_train_epochs": 100,
"warmup_steps": 2000,
"eval_steps": 5000,
"overwrite_output_dir": true,
"label_smoothing": 0.1,
"per_device_train_batch_size":128,
"per_device_eval_batch_size":128,
"gradient_accumulation_steps": 1,
"save_steps": 5000,
"logging_first_step":true,
"logging_steps": 200,
"save_total_limit": 1,
"temperature": 1,
"do_train": true,
"do_test": true,
"do_eval": true,
"check_mode": false,
"predict_with_generate": true,
"task_embedding_dim": 512,
"split_validation_test": true,
"non_linearity": "gelu_new",
"load_best_model_at_end": true,
"evaluation_strategy": "steps",
"metric_for_best_model": "eval_average_metrics",
"greater_is_better": true,
"max_steps": 600000,
"tasks": ["rte", "sst2", "mrpc", "stsb", "qqp", "mnli", "qnli", "cola"],
"eval_tasks": ["rte", "sst2", "mrpc", "stsb", "qqp", "mnli", "qnli", "cola"],
"train_adapters": true,
"reduction_factor": 32, 
"unfreeze_lm_head": false,
"unfreeze_layer_norms": true,
"adapter_config_name": "adapter",
"num_adapters": 1,
"routing_estimator": null,
"load_loss_weight": 0.0,
"model_dim": 768,
"load_loss_accm": null,
"num_routers": 1,
"analyze_model":false,
"data_seed": 42
}




