,name,seed_everything,test_prompt_length,max_pred_length,lr,adversarial_training,max_length,batch_size,model,dim_model,num_heads,dim_feedforward,num_decoder_layers,dropout_p,layer_norm_eps,lstm_embedding_dim,lstm_hidden_dim,lstm_num_layers,lstm_dropout,linear_embedding_dim,linear_bias,linear_dim,mamba_d_model,mamba_d_state,mamba_d_conv,mamba_n_layers,min_train_loss,min_train_loss_step,max_val_accuracy,max_val_accuracy_step,min_val_loss,min_val_loss_step,min_val_kl,min_val_kl_step,rule_1_accuracy4min_val_loss,rule_2_accuracy4min_val_loss,grammatical_accuracy4min_val_loss,finished4min_val_loss,ood_rule_1_accuracy4min_val_loss,ood_rule_2_completion_accuracy4min_val_loss,ood_rule_2_accuracy4min_val_loss,ood_grammatical_accuracy4min_val_loss,ood_finished4min_val_loss,sos_rule_1_accuracy4min_val_loss,sos_rule_2_accuracy4min_val_loss,sos_grammatical_accuracy4min_val_loss,sos_finished4min_val_loss
0,efficient-sweep-3,56967,8,300,0.002,False,256,128,lstm,10,5,1024,5,0.1,0.006,10,64,5,0.4,10,True,256,32,16,8,10,0.018814217299222946,270,0.9961393475532532,50,0.018886016681790352,198,-0.01902046985924244,198,1.0,1,1.0,1.0,0.8333333134651184,1.0,0.07692307978868484,0,0.4223107695579529,1,1,1,1
1,winter-sweep-2,77863,8,300,0.002,False,256,128,lstm,10,5,1024,5,0.1,0.006,10,64,5,0.4,10,True,256,32,16,8,10,0.018861880525946617,401,0.9961393475532532,49,0.018897565081715584,58,-0.019008921459317207,58,1.0,1,1.0,1.0,0.35211268067359924,1.0,0.028169013559818268,0,1.0,1,1,1,1
2,distinctive-sweep-1,50348,8,300,0.002,False,256,128,lstm,10,5,1024,5,0.1,0.006,10,64,5,0.4,10,True,256,32,16,8,10,0.018817245960235596,259,0.9961393475532532,35,0.018886137753725052,132,-0.01902034878730774,132,1.0,1,1.0,1.0,0.19230769574642181,1.0,0.08695652335882187,0,0.15537849068641663,1,1,1,1
