model,dataset,accuracy,config,method,version,LoRA.r,batch_size,steps,lr,weight_decay
flan-t5-base,glue-cola,0.6951102588686481,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': None}, 'dataset': {'name': 'glue-cola', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'cola'}, 'preprocessor': {'_target_': 'peta.tasks.CoLA_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",fullfinetuned,0,,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-cola,0.7526366251198466,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': None}, 'dataset': {'name': 'glue-cola', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'cola'}, 'preprocessor': {'_target_': 'peta.tasks.CoLA_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",fullfinetuned,1,,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-cola,0.62320230105465,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-cola', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'cola'}, 'preprocessor': {'_target_': 'peta.tasks.CoLA_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,4,16.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-cola,0.6883988494726749,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-cola', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'cola'}, 'preprocessor': {'_target_': 'peta.tasks.CoLA_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,5,16.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-cola,0.6893576222435283,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-cola', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'cola'}, 'preprocessor': {'_target_': 'peta.tasks.CoLA_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,6,16.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-cola,0.6903163950143816,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-cola', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'cola'}, 'preprocessor': {'_target_': 'peta.tasks.CoLA_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,7,16.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-cola,0.5551294343240653,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-cola', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'cola'}, 'preprocessor': {'_target_': 'peta.tasks.CoLA_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,8,32.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-cola,0.6893576222435283,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-cola', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'cola'}, 'preprocessor': {'_target_': 'peta.tasks.CoLA_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,9,32.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-cola,0.6893576222435283,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-cola', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'cola'}, 'preprocessor': {'_target_': 'peta.tasks.CoLA_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,10,32.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-cola,0.6903163950143816,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-cola', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'cola'}, 'preprocessor': {'_target_': 'peta.tasks.CoLA_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,11,32.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-cola,0.486097794822627,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-cola', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'cola'}, 'preprocessor': {'_target_': 'peta.tasks.CoLA_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,0,8.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-cola,0.6883988494726749,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-cola', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'cola'}, 'preprocessor': {'_target_': 'peta.tasks.CoLA_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,1,8.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-cola,0.6883988494726749,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-cola', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'cola'}, 'preprocessor': {'_target_': 'peta.tasks.CoLA_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,2,8.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-cola,0.6912751677852349,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-cola', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'cola'}, 'preprocessor': {'_target_': 'peta.tasks.CoLA_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,3,8.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-cola,0.6922339405560882,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-cola', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'cola'}, 'preprocessor': {'_target_': 'peta.tasks.CoLA_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,4,16.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-cola,0.6922339405560882,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-cola', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'cola'}, 'preprocessor': {'_target_': 'peta.tasks.CoLA_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,5,16.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-cola,0.6922339405560882,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-cola', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'cola'}, 'preprocessor': {'_target_': 'peta.tasks.CoLA_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,6,16.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-cola,0.6922339405560882,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-cola', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'cola'}, 'preprocessor': {'_target_': 'peta.tasks.CoLA_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,7,16.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-cola,0.6912751677852349,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-cola', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'cola'}, 'preprocessor': {'_target_': 'peta.tasks.CoLA_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,8,32.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-cola,0.6922339405560882,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-cola', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'cola'}, 'preprocessor': {'_target_': 'peta.tasks.CoLA_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,9,32.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-cola,0.6903163950143816,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-cola', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'cola'}, 'preprocessor': {'_target_': 'peta.tasks.CoLA_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,0,8.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-cola,0.6922339405560882,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-cola', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'cola'}, 'preprocessor': {'_target_': 'peta.tasks.CoLA_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,1,8.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-cola,0.6912751677852349,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-cola', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'cola'}, 'preprocessor': {'_target_': 'peta.tasks.CoLA_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,2,8.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-cola,0.6912751677852349,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-cola', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'cola'}, 'preprocessor': {'_target_': 'peta.tasks.CoLA_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,3,8.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-mnli,0.8141619969434539,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': None}, 'dataset': {'name': 'glue-mnli', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mnli'}, 'preprocessor': {'_target_': 'peta.tasks.MNLI_Preprocessor'}, 'map_kwargs': {'remove_columns': ['idx', 'hypothesis', 'premise', 'label'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",fullfinetuned,0,,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-mnli,0.8241467142129394,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': None}, 'dataset': {'name': 'glue-mnli', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mnli'}, 'preprocessor': {'_target_': 'peta.tasks.MNLI_Preprocessor'}, 'map_kwargs': {'remove_columns': ['idx', 'hypothesis', 'premise', 'label'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",fullfinetuned,1,,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-mnli,0.0001018848700967,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mnli', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mnli'}, 'preprocessor': {'_target_': 'peta.tasks.MNLI_Preprocessor'}, 'map_kwargs': {'remove_columns': ['idx', 'hypothesis', 'premise', 'label'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,4,16.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-mnli,0.2903718797758532,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mnli', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mnli'}, 'preprocessor': {'_target_': 'peta.tasks.MNLI_Preprocessor'}, 'map_kwargs': {'remove_columns': ['idx', 'hypothesis', 'premise', 'label'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,5,16.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-mnli,0.38634742740703,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mnli', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mnli'}, 'preprocessor': {'_target_': 'peta.tasks.MNLI_Preprocessor'}, 'map_kwargs': {'remove_columns': ['idx', 'hypothesis', 'premise', 'label'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,6,16.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-mnli,0.4588894549159449,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mnli', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mnli'}, 'preprocessor': {'_target_': 'peta.tasks.MNLI_Preprocessor'}, 'map_kwargs': {'remove_columns': ['idx', 'hypothesis', 'premise', 'label'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,7,16.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-mnli,0.0001018848700967,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mnli', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mnli'}, 'preprocessor': {'_target_': 'peta.tasks.MNLI_Preprocessor'}, 'map_kwargs': {'remove_columns': ['idx', 'hypothesis', 'premise', 'label'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,8,32.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-mnli,0.2565461029037188,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mnli', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mnli'}, 'preprocessor': {'_target_': 'peta.tasks.MNLI_Preprocessor'}, 'map_kwargs': {'remove_columns': ['idx', 'hypothesis', 'premise', 'label'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,9,32.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-mnli,0.393173713703515,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mnli', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mnli'}, 'preprocessor': {'_target_': 'peta.tasks.MNLI_Preprocessor'}, 'map_kwargs': {'remove_columns': ['idx', 'hypothesis', 'premise', 'label'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,10,32.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-mnli,0.4821192052980132,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mnli', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mnli'}, 'preprocessor': {'_target_': 'peta.tasks.MNLI_Preprocessor'}, 'map_kwargs': {'remove_columns': ['idx', 'hypothesis', 'premise', 'label'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,11,32.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-mnli,0.0001018848700967,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mnli', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mnli'}, 'preprocessor': {'_target_': 'peta.tasks.MNLI_Preprocessor'}, 'map_kwargs': {'remove_columns': ['idx', 'hypothesis', 'premise', 'label'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,0,8.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-mnli,0.2886398369842078,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mnli', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mnli'}, 'preprocessor': {'_target_': 'peta.tasks.MNLI_Preprocessor'}, 'map_kwargs': {'remove_columns': ['idx', 'hypothesis', 'premise', 'label'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,1,8.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-mnli,0.4223127865511971,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mnli', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mnli'}, 'preprocessor': {'_target_': 'peta.tasks.MNLI_Preprocessor'}, 'map_kwargs': {'remove_columns': ['idx', 'hypothesis', 'premise', 'label'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,2,8.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-mnli,0.5019867549668874,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mnli', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mnli'}, 'preprocessor': {'_target_': 'peta.tasks.MNLI_Preprocessor'}, 'map_kwargs': {'remove_columns': ['idx', 'hypothesis', 'premise', 'label'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,3,8.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-mnli,0.6665308201732043,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mnli', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mnli'}, 'preprocessor': {'_target_': 'peta.tasks.MNLI_Preprocessor'}, 'map_kwargs': {'remove_columns': ['idx', 'hypothesis', 'premise', 'label'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,4,16.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-mnli,0.7130922058074376,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mnli', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mnli'}, 'preprocessor': {'_target_': 'peta.tasks.MNLI_Preprocessor'}, 'map_kwargs': {'remove_columns': ['idx', 'hypothesis', 'premise', 'label'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,5,16.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-mnli,0.7418237391747325,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mnli', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mnli'}, 'preprocessor': {'_target_': 'peta.tasks.MNLI_Preprocessor'}, 'map_kwargs': {'remove_columns': ['idx', 'hypothesis', 'premise', 'label'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,6,16.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-mnli,0.7613856342333164,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mnli', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mnli'}, 'preprocessor': {'_target_': 'peta.tasks.MNLI_Preprocessor'}, 'map_kwargs': {'remove_columns': ['idx', 'hypothesis', 'premise', 'label'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,7,16.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-mnli,0.7395822720326032,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mnli', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mnli'}, 'preprocessor': {'_target_': 'peta.tasks.MNLI_Preprocessor'}, 'map_kwargs': {'remove_columns': ['idx', 'hypothesis', 'premise', 'label'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,8,32.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-mnli,0.7600611309220581,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mnli', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mnli'}, 'preprocessor': {'_target_': 'peta.tasks.MNLI_Preprocessor'}, 'map_kwargs': {'remove_columns': ['idx', 'hypothesis', 'premise', 'label'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,9,32.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-mnli,0.6686704024452369,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mnli', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mnli'}, 'preprocessor': {'_target_': 'peta.tasks.MNLI_Preprocessor'}, 'map_kwargs': {'remove_columns': ['idx', 'hypothesis', 'premise', 'label'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,0,8.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-mnli,0.7175751400916964,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mnli', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mnli'}, 'preprocessor': {'_target_': 'peta.tasks.MNLI_Preprocessor'}, 'map_kwargs': {'remove_columns': ['idx', 'hypothesis', 'premise', 'label'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,1,8.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-mnli,0.7451859398879267,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mnli', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mnli'}, 'preprocessor': {'_target_': 'peta.tasks.MNLI_Preprocessor'}, 'map_kwargs': {'remove_columns': ['idx', 'hypothesis', 'premise', 'label'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,2,8.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-mnli,0.7641365257259297,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mnli', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mnli'}, 'preprocessor': {'_target_': 'peta.tasks.MNLI_Preprocessor'}, 'map_kwargs': {'remove_columns': ['idx', 'hypothesis', 'premise', 'label'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,3,8.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-mrpc,0.8235294117647058,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': None}, 'dataset': {'name': 'glue-mrpc', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mrpc'}, 'preprocessor': {'_target_': 'peta.tasks.MRPC_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",fullfinetuned,0,,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-mrpc,0.8578431372549019,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': None}, 'dataset': {'name': 'glue-mrpc', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mrpc'}, 'preprocessor': {'_target_': 'peta.tasks.MRPC_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",fullfinetuned,1,,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-mrpc,0.0,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mrpc', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mrpc'}, 'preprocessor': {'_target_': 'peta.tasks.MRPC_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,4,16.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-mrpc,0.6838235294117647,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mrpc', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mrpc'}, 'preprocessor': {'_target_': 'peta.tasks.MRPC_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,5,16.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-mrpc,0.6838235294117647,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mrpc', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mrpc'}, 'preprocessor': {'_target_': 'peta.tasks.MRPC_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,6,16.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-mrpc,0.6838235294117647,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mrpc', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mrpc'}, 'preprocessor': {'_target_': 'peta.tasks.MRPC_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,7,16.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-mrpc,0.0,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mrpc', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mrpc'}, 'preprocessor': {'_target_': 'peta.tasks.MRPC_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,8,32.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-mrpc,0.6838235294117647,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mrpc', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mrpc'}, 'preprocessor': {'_target_': 'peta.tasks.MRPC_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,9,32.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-mrpc,0.6838235294117647,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mrpc', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mrpc'}, 'preprocessor': {'_target_': 'peta.tasks.MRPC_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,10,32.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-mrpc,0.6838235294117647,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mrpc', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mrpc'}, 'preprocessor': {'_target_': 'peta.tasks.MRPC_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,11,32.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-mrpc,0.0,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mrpc', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mrpc'}, 'preprocessor': {'_target_': 'peta.tasks.MRPC_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,0,8.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-mrpc,0.6838235294117647,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mrpc', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mrpc'}, 'preprocessor': {'_target_': 'peta.tasks.MRPC_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,1,8.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-mrpc,0.6838235294117647,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mrpc', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mrpc'}, 'preprocessor': {'_target_': 'peta.tasks.MRPC_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,2,8.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-mrpc,0.6838235294117647,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mrpc', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mrpc'}, 'preprocessor': {'_target_': 'peta.tasks.MRPC_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,3,8.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-mrpc,0.7230392156862745,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mrpc', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mrpc'}, 'preprocessor': {'_target_': 'peta.tasks.MRPC_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,4,16.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-mrpc,0.7818627450980392,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mrpc', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mrpc'}, 'preprocessor': {'_target_': 'peta.tasks.MRPC_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,5,16.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-mrpc,0.7965686274509803,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mrpc', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mrpc'}, 'preprocessor': {'_target_': 'peta.tasks.MRPC_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,6,16.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-mrpc,0.803921568627451,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mrpc', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mrpc'}, 'preprocessor': {'_target_': 'peta.tasks.MRPC_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,7,16.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-mrpc,0.7990196078431373,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mrpc', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mrpc'}, 'preprocessor': {'_target_': 'peta.tasks.MRPC_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,8,32.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-mrpc,0.7990196078431373,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mrpc', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mrpc'}, 'preprocessor': {'_target_': 'peta.tasks.MRPC_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,9,32.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-mrpc,0.7009803921568627,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mrpc', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mrpc'}, 'preprocessor': {'_target_': 'peta.tasks.MRPC_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,0,8.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-mrpc,0.7622549019607843,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mrpc', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mrpc'}, 'preprocessor': {'_target_': 'peta.tasks.MRPC_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,1,8.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-mrpc,0.7916666666666666,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mrpc', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mrpc'}, 'preprocessor': {'_target_': 'peta.tasks.MRPC_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,2,8.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-mrpc,0.7990196078431373,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-mrpc', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'mrpc'}, 'preprocessor': {'_target_': 'peta.tasks.MRPC_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,3,8.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-qqp,0.8293346524857779,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': None}, 'dataset': {'name': 'glue-qqp', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'qqp'}, 'preprocessor': {'_target_': 'peta.tasks.QQP_Preprocessor'}, 'map_kwargs': {'remove_columns': ['question1', 'question2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",fullfinetuned,0,,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-qqp,0.8360870640613406,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': None}, 'dataset': {'name': 'glue-qqp', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'qqp'}, 'preprocessor': {'_target_': 'peta.tasks.QQP_Preprocessor'}, 'map_kwargs': {'remove_columns': ['question1', 'question2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",fullfinetuned,1,,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-qqp,0.0,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-qqp', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'qqp'}, 'preprocessor': {'_target_': 'peta.tasks.QQP_Preprocessor'}, 'map_kwargs': {'remove_columns': ['question1', 'question2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,4,16.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-qqp,0.3482809794706901,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-qqp', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'qqp'}, 'preprocessor': {'_target_': 'peta.tasks.QQP_Preprocessor'}, 'map_kwargs': {'remove_columns': ['question1', 'question2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,5,16.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-qqp,0.8033638387336136,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-qqp', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'qqp'}, 'preprocessor': {'_target_': 'peta.tasks.QQP_Preprocessor'}, 'map_kwargs': {'remove_columns': ['question1', 'question2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,6,16.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-qqp,0.6990353697749196,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-qqp', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'qqp'}, 'preprocessor': {'_target_': 'peta.tasks.QQP_Preprocessor'}, 'map_kwargs': {'remove_columns': ['question1', 'question2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,7,16.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-qqp,0.0,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-qqp', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'qqp'}, 'preprocessor': {'_target_': 'peta.tasks.QQP_Preprocessor'}, 'map_kwargs': {'remove_columns': ['question1', 'question2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,8,32.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-qqp,0.3341825377195152,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-qqp', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'qqp'}, 'preprocessor': {'_target_': 'peta.tasks.QQP_Preprocessor'}, 'map_kwargs': {'remove_columns': ['question1', 'question2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,9,32.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-qqp,0.7765025970813753,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-qqp', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'qqp'}, 'preprocessor': {'_target_': 'peta.tasks.QQP_Preprocessor'}, 'map_kwargs': {'remove_columns': ['question1', 'question2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,10,32.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-qqp,0.7178332921098194,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-qqp', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'qqp'}, 'preprocessor': {'_target_': 'peta.tasks.QQP_Preprocessor'}, 'map_kwargs': {'remove_columns': ['question1', 'question2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,11,32.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-qqp,0.0,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-qqp', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'qqp'}, 'preprocessor': {'_target_': 'peta.tasks.QQP_Preprocessor'}, 'map_kwargs': {'remove_columns': ['question1', 'question2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,0,8.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-qqp,0.4714073707642839,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-qqp', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'qqp'}, 'preprocessor': {'_target_': 'peta.tasks.QQP_Preprocessor'}, 'map_kwargs': {'remove_columns': ['question1', 'question2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,1,8.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-qqp,0.7382389314865199,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-qqp', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'qqp'}, 'preprocessor': {'_target_': 'peta.tasks.QQP_Preprocessor'}, 'map_kwargs': {'remove_columns': ['question1', 'question2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,2,8.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-qqp,0.6998515953499876,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-qqp', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'qqp'}, 'preprocessor': {'_target_': 'peta.tasks.QQP_Preprocessor'}, 'map_kwargs': {'remove_columns': ['question1', 'question2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,3,8.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-qqp,0.7340341330695028,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-qqp', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'qqp'}, 'preprocessor': {'_target_': 'peta.tasks.QQP_Preprocessor'}, 'map_kwargs': {'remove_columns': ['question1', 'question2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,4,16.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-qqp,0.7907741775908979,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-qqp', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'qqp'}, 'preprocessor': {'_target_': 'peta.tasks.QQP_Preprocessor'}, 'map_kwargs': {'remove_columns': ['question1', 'question2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,5,16.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-qqp,0.8123423200593619,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-qqp', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'qqp'}, 'preprocessor': {'_target_': 'peta.tasks.QQP_Preprocessor'}, 'map_kwargs': {'remove_columns': ['question1', 'question2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,6,16.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-qqp,0.8246351719020529,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-qqp', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'qqp'}, 'preprocessor': {'_target_': 'peta.tasks.QQP_Preprocessor'}, 'map_kwargs': {'remove_columns': ['question1', 'question2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,7,16.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-qqp,0.8124412564927035,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-qqp', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'qqp'}, 'preprocessor': {'_target_': 'peta.tasks.QQP_Preprocessor'}, 'map_kwargs': {'remove_columns': ['question1', 'question2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,8,32.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-qqp,0.8239178827603265,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-qqp', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'qqp'}, 'preprocessor': {'_target_': 'peta.tasks.QQP_Preprocessor'}, 'map_kwargs': {'remove_columns': ['question1', 'question2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,9,32.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-qqp,0.7353203067029433,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-qqp', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'qqp'}, 'preprocessor': {'_target_': 'peta.tasks.QQP_Preprocessor'}, 'map_kwargs': {'remove_columns': ['question1', 'question2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,0,8.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-qqp,0.7924066287410338,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-qqp', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'qqp'}, 'preprocessor': {'_target_': 'peta.tasks.QQP_Preprocessor'}, 'map_kwargs': {'remove_columns': ['question1', 'question2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,1,8.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-qqp,0.8138263665594855,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-qqp', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'qqp'}, 'preprocessor': {'_target_': 'peta.tasks.QQP_Preprocessor'}, 'map_kwargs': {'remove_columns': ['question1', 'question2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,2,8.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-qqp,0.8259460796438288,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-qqp', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'qqp'}, 'preprocessor': {'_target_': 'peta.tasks.QQP_Preprocessor'}, 'map_kwargs': {'remove_columns': ['question1', 'question2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,3,8.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-rte,0.851985559566787,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': None}, 'dataset': {'name': 'glue-rte', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'rte'}, 'preprocessor': {'_target_': 'peta.tasks.RTE_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",fullfinetuned,0,,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-rte,0.851985559566787,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': None}, 'dataset': {'name': 'glue-rte', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'rte'}, 'preprocessor': {'_target_': 'peta.tasks.RTE_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",fullfinetuned,1,,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-rte,0.0,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-rte', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'rte'}, 'preprocessor': {'_target_': 'peta.tasks.RTE_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,4,16.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-rte,0.4187725631768953,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-rte', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'rte'}, 'preprocessor': {'_target_': 'peta.tasks.RTE_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,5,16.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-rte,0.631768953068592,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-rte', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'rte'}, 'preprocessor': {'_target_': 'peta.tasks.RTE_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,6,16.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-rte,0.7545126353790613,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-rte', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'rte'}, 'preprocessor': {'_target_': 'peta.tasks.RTE_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,7,16.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-rte,0.0,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-rte', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'rte'}, 'preprocessor': {'_target_': 'peta.tasks.RTE_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,8,32.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-rte,0.4368231046931408,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-rte', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'rte'}, 'preprocessor': {'_target_': 'peta.tasks.RTE_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,9,32.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-rte,0.6028880866425993,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-rte', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'rte'}, 'preprocessor': {'_target_': 'peta.tasks.RTE_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,10,32.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-rte,0.7364620938628159,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-rte', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'rte'}, 'preprocessor': {'_target_': 'peta.tasks.RTE_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,11,32.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-rte,0.0,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-rte', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'rte'}, 'preprocessor': {'_target_': 'peta.tasks.RTE_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,0,8.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-rte,0.4729241877256318,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-rte', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'rte'}, 'preprocessor': {'_target_': 'peta.tasks.RTE_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,1,8.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-rte,0.6137184115523465,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-rte', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'rte'}, 'preprocessor': {'_target_': 'peta.tasks.RTE_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,2,8.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-rte,0.7436823104693141,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-rte', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'rte'}, 'preprocessor': {'_target_': 'peta.tasks.RTE_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,3,8.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-rte,0.8267148014440433,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-rte', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'rte'}, 'preprocessor': {'_target_': 'peta.tasks.RTE_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,4,16.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-rte,0.8158844765342961,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-rte', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'rte'}, 'preprocessor': {'_target_': 'peta.tasks.RTE_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,5,16.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-rte,0.8122743682310469,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-rte', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'rte'}, 'preprocessor': {'_target_': 'peta.tasks.RTE_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,6,16.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-rte,0.8014440433212996,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-rte', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'rte'}, 'preprocessor': {'_target_': 'peta.tasks.RTE_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,7,16.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-rte,0.8050541516245487,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-rte', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'rte'}, 'preprocessor': {'_target_': 'peta.tasks.RTE_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,8,32.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-rte,0.8050541516245487,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-rte', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'rte'}, 'preprocessor': {'_target_': 'peta.tasks.RTE_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,9,32.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-rte,0.8050541516245487,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-rte', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'rte'}, 'preprocessor': {'_target_': 'peta.tasks.RTE_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,0,8.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-rte,0.8014440433212996,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-rte', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'rte'}, 'preprocessor': {'_target_': 'peta.tasks.RTE_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,1,8.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-rte,0.8050541516245487,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-rte', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'rte'}, 'preprocessor': {'_target_': 'peta.tasks.RTE_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,2,8.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-rte,0.8014440433212996,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-rte', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'rte'}, 'preprocessor': {'_target_': 'peta.tasks.RTE_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,3,8.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-sst2,0.93348623853211,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': None}, 'dataset': {'name': 'glue-sst2', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'sst2'}, 'preprocessor': {'_target_': 'peta.tasks.SST2_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",fullfinetuned,0,,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-sst2,0.93348623853211,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': None}, 'dataset': {'name': 'glue-sst2', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'sst2'}, 'preprocessor': {'_target_': 'peta.tasks.SST2_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",fullfinetuned,1,,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-sst2,0.8807339449541285,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-sst2', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'sst2'}, 'preprocessor': {'_target_': 'peta.tasks.SST2_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,4,16.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-sst2,0.9105504587155964,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-sst2', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'sst2'}, 'preprocessor': {'_target_': 'peta.tasks.SST2_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,5,16.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-sst2,0.9059633027522936,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-sst2', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'sst2'}, 'preprocessor': {'_target_': 'peta.tasks.SST2_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,6,16.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-sst2,0.9094036697247706,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-sst2', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'sst2'}, 'preprocessor': {'_target_': 'peta.tasks.SST2_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,7,16.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-sst2,0.8807339449541285,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-sst2', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'sst2'}, 'preprocessor': {'_target_': 'peta.tasks.SST2_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,8,32.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-sst2,0.9128440366972476,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-sst2', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'sst2'}, 'preprocessor': {'_target_': 'peta.tasks.SST2_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,9,32.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-sst2,0.9105504587155964,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-sst2', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'sst2'}, 'preprocessor': {'_target_': 'peta.tasks.SST2_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,10,32.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-sst2,0.9151376146788992,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-sst2', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'sst2'}, 'preprocessor': {'_target_': 'peta.tasks.SST2_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,11,32.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-sst2,0.8761467889908257,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-sst2', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'sst2'}, 'preprocessor': {'_target_': 'peta.tasks.SST2_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,0,8.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-sst2,0.9174311926605504,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-sst2', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'sst2'}, 'preprocessor': {'_target_': 'peta.tasks.SST2_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,1,8.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-sst2,0.9105504587155964,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-sst2', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'sst2'}, 'preprocessor': {'_target_': 'peta.tasks.SST2_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,2,8.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-sst2,0.9151376146788992,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-sst2', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'sst2'}, 'preprocessor': {'_target_': 'peta.tasks.SST2_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,3,8.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-sst2,0.9197247706422018,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-sst2', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'sst2'}, 'preprocessor': {'_target_': 'peta.tasks.SST2_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,4,16.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-sst2,0.9220183486238532,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-sst2', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'sst2'}, 'preprocessor': {'_target_': 'peta.tasks.SST2_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,5,16.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-sst2,0.9208715596330276,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-sst2', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'sst2'}, 'preprocessor': {'_target_': 'peta.tasks.SST2_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,6,16.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-sst2,0.9220183486238532,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-sst2', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'sst2'}, 'preprocessor': {'_target_': 'peta.tasks.SST2_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,7,16.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-sst2,0.9197247706422018,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-sst2', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'sst2'}, 'preprocessor': {'_target_': 'peta.tasks.SST2_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,8,32.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-sst2,0.9220183486238532,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-sst2', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'sst2'}, 'preprocessor': {'_target_': 'peta.tasks.SST2_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,9,32.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-sst2,0.9197247706422018,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-sst2', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'sst2'}, 'preprocessor': {'_target_': 'peta.tasks.SST2_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,0,8.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-sst2,0.9231651376146788,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-sst2', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'sst2'}, 'preprocessor': {'_target_': 'peta.tasks.SST2_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [0, 1, 2, 3], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,1,8.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-sst2,0.9231651376146788,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-sst2', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'sst2'}, 'preprocessor': {'_target_': 'peta.tasks.SST2_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,2,8.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-sst2,0.9231651376146788,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-sst2', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'sst2'}, 'preprocessor': {'_target_': 'peta.tasks.SST2_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,3,8.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-stsb,0.8921760083468526,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': None}, 'dataset': {'name': 'glue-stsb', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'stsb'}, 'preprocessor': {'_target_': 'peta.tasks.STSB_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",fullfinetuned,0,,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-stsb,0.90458476745656,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': None}, 'dataset': {'name': 'glue-stsb', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'stsb'}, 'preprocessor': {'_target_': 'peta.tasks.STSB_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",fullfinetuned,1,,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-stsb,0.0105081853015102,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-stsb', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'stsb'}, 'preprocessor': {'_target_': 'peta.tasks.STSB_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,4,16.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-stsb,0.3811173419007331,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-stsb', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'stsb'}, 'preprocessor': {'_target_': 'peta.tasks.STSB_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,5,16.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-stsb,0.5552945218136972,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-stsb', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'stsb'}, 'preprocessor': {'_target_': 'peta.tasks.STSB_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,6,16.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-stsb,0.5938880505969028,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-stsb', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'stsb'}, 'preprocessor': {'_target_': 'peta.tasks.STSB_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,7,16.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-stsb,0.0057936639780018,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-stsb', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'stsb'}, 'preprocessor': {'_target_': 'peta.tasks.STSB_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,8,32.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-stsb,0.5506238957964236,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-stsb', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'stsb'}, 'preprocessor': {'_target_': 'peta.tasks.STSB_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,9,32.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-stsb,0.6056638710130513,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-stsb', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'stsb'}, 'preprocessor': {'_target_': 'peta.tasks.STSB_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,10,32.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-stsb,0.6552552582613683,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-stsb', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'stsb'}, 'preprocessor': {'_target_': 'peta.tasks.STSB_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,11,32.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-stsb,0.004789157333591,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-stsb', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'stsb'}, 'preprocessor': {'_target_': 'peta.tasks.STSB_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,0,8.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-stsb,0.6166271980388973,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-stsb', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'stsb'}, 'preprocessor': {'_target_': 'peta.tasks.STSB_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,1,8.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-stsb,0.6674061423386932,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-stsb', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'stsb'}, 'preprocessor': {'_target_': 'peta.tasks.STSB_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,2,8.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-stsb,0.6440058478605633,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': True}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-stsb', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'stsb'}, 'preprocessor': {'_target_': 'peta.tasks.STSB_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",l_lora,3,8.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-stsb,0.7786461174516925,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-stsb', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'stsb'}, 'preprocessor': {'_target_': 'peta.tasks.STSB_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,4,16.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-stsb,0.8108111417534208,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-stsb', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'stsb'}, 'preprocessor': {'_target_': 'peta.tasks.STSB_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,5,16.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-stsb,0.8510066651445863,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-stsb', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'stsb'}, 'preprocessor': {'_target_': 'peta.tasks.STSB_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,6,16.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-stsb,0.8749174712564725,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-stsb', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'stsb'}, 'preprocessor': {'_target_': 'peta.tasks.STSB_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,7,16.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-stsb,0.8568834947668987,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-stsb', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'stsb'}, 'preprocessor': {'_target_': 'peta.tasks.STSB_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,8,32.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-stsb,0.8757921113512951,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 32, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-stsb', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'stsb'}, 'preprocessor': {'_target_': 'peta.tasks.STSB_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,9,32.0,16.0,2000.0,4e-05,0.0
flan-t5-base,glue-stsb,0.7942162752380959,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-stsb', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'stsb'}, 'preprocessor': {'_target_': 'peta.tasks.STSB_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 1e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,0,8.0,16.0,2000.0,1e-05,0.0
flan-t5-base,glue-stsb,0.8125534789951494,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-stsb', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'stsb'}, 'preprocessor': {'_target_': 'peta.tasks.STSB_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 2e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,1,8.0,16.0,2000.0,2e-05,0.0
flan-t5-base,glue-stsb,0.8547383561063538,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-stsb', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'stsb'}, 'preprocessor': {'_target_': 'peta.tasks.STSB_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 3e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,2,8.0,16.0,2000.0,3e-05,0.0
flan-t5-base,glue-stsb,0.876562823477553,"{'model': {'model': {'_target_': 'transformers.AutoModelForSeq2SeqLM.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'tokenizer': {'_target_': 'transformers.AutoTokenizer.from_pretrained', 'pretrained_model_name_or_path': '${..model_name_or_path}'}, 'model_name_or_path': 'google/flan-t5-base', 'name': 'flan-t5-base', 'tokenizer_kwargs': {'padding': 'max_length', 'truncation': True, 'return_tensors': 'pt'}, 'linearize': False}, 'peft': {'peft_config': {'_target_': 'peft.LoraConfig', 'target_modules': ['q', 'v'], 'inference_mode': False, 'r': 8, 'lora_alpha': 32, 'lora_dropout': 0.1}, 'seed': 42}, 'dataset': {'name': 'glue-stsb', 'datasets': {'_target_': 'datasets.load_dataset', 'path': 'glue', 'name': 'stsb'}, 'preprocessor': {'_target_': 'peta.tasks.STSB_Preprocessor'}, 'map_kwargs': {'remove_columns': ['sentence1', 'sentence2', 'label', 'idx'], 'batched': True, 'num_proc': 1, 'desc': 'Running tokenizer on dataset'}}, 'optim': {'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 4e-05, 'weight_decay': 0}}, 'seed': 42, 'batch_size': 16, 'num_workers': 8, 'trainer': {'accelerator': 'gpu', 'devices': [4, 5, 6, 7], 'max_epochs': None, 'max_steps': 2000, 'accumulate_grad_batches': 1, 'profiler': 'simple', 'enable_checkpointing': False, 'fast_dev_run': False}}",lora,3,8.0,16.0,2000.0,4e-05,0.0
