,name,seed_everything,test_prompt_length,max_pred_length,lr,adversarial_training,max_length,batch_size,model,dim_model,num_heads,dim_feedforward,num_decoder_layers,dropout_p,layer_norm_eps,lstm_embedding_dim,lstm_hidden_dim,lstm_num_layers,lstm_dropout,linear_embedding_dim,linear_bias,linear_dim,mamba_d_model,mamba_d_state,mamba_d_conv,mamba_n_layers,min_train_loss,min_train_loss_step,max_val_accuracy,max_val_accuracy_step,min_val_loss,min_val_loss_step,min_val_kl,min_val_kl_step,rule_1_accuracy4min_val_loss,rule_2_accuracy4min_val_loss,grammatical_accuracy4min_val_loss,finished4min_val_loss,ood_rule_1_accuracy4min_val_loss,ood_rule_2_completion_accuracy4min_val_loss,ood_rule_2_accuracy4min_val_loss,ood_grammatical_accuracy4min_val_loss,ood_finished4min_val_loss,sos_rule_1_accuracy4min_val_loss,sos_rule_2_accuracy4min_val_loss,sos_grammatical_accuracy4min_val_loss,sos_finished4min_val_loss
0,spring-sweep-6,97597,5,300,0.002,False,256,128,linear,10,5,1024,7,0.1,0.006,10,64,5,0.4,10,True,256,32,16,8,10,1.1856142009492032e-05,129,0.5134813189506531,0,2.7808756828308105,0,-0.037894636392593384,25,0,0,0,0,0,0,0,0,0,0,0,0,0
1,wobbly-sweep-5,19588,5,300,0.002,False,256,128,linear,10,5,1024,7,0.1,0.006,10,64,5,0.4,10,True,256,32,16,8,10,1.1850984265038278e-05,129,0.5173265933990479,5,2.971360445022583,0,-0.03789464011788368,25,0,0,0,0,0,0,0,0,0,0,0,0,0
2,still-sweep-4,68687,5,300,0.002,False,256,128,linear,10,5,1024,7,0.1,0.006,10,64,5,0.4,10,True,256,32,16,8,10,1.2282322131795809e-05,129,0.5206225514411926,20,2.934164524078369,0,-0.03789421170949936,25,0,0,0,0,0,0,0,0,0,0,0,0,0
3,exalted-sweep-3,34960,5,300,0.002,False,256,128,linear,10,5,1024,7,0.1,0.006,10,64,5,0.4,10,True,256,32,16,8,10,1.1764450391638093e-05,129,0.5097275972366333,2,2.562844753265381,0,-0.03789472579956055,25,0,0,0,0,0,0,0,0,0,0,0,0,0
4,celestial-sweep-2,89570,5,300,0.002,False,256,128,linear,10,5,1024,7,0.1,0.006,10,64,5,0.4,10,True,256,32,16,8,10,1.1214283404115122e-05,129,0.5095444917678833,0,2.034409761428833,0,-0.03789527714252472,25,0,0,0,0,0,0,0,0,0,0,0,0,0
