,name,seed_everything,test_prompt_length,max_pred_length,lr,adversarial_training,max_length,batch_size,model,dim_model,num_heads,dim_feedforward,num_decoder_layers,dropout_p,layer_norm_eps,lstm_embedding_dim,lstm_hidden_dim,lstm_num_layers,lstm_dropout,linear_embedding_dim,linear_bias,linear_dim,mamba_d_model,mamba_d_state,mamba_d_conv,mamba_n_layers,min_train_loss,min_train_loss_step,max_val_accuracy,max_val_accuracy_step,min_val_loss,min_val_loss_step,min_val_kl,min_val_kl_step,rule_1_accuracy4min_val_loss,rule_2_accuracy4min_val_loss,grammatical_accuracy4min_val_loss,finished4min_val_loss,ood_rule_1_accuracy4min_val_loss,ood_rule_2_completion_accuracy4min_val_loss,ood_rule_2_accuracy4min_val_loss,ood_grammatical_accuracy4min_val_loss,ood_finished4min_val_loss,sos_rule_1_accuracy4min_val_loss,sos_rule_2_accuracy4min_val_loss,sos_grammatical_accuracy4min_val_loss,sos_finished4min_val_loss
0,deep-sweep-4,31858,8,300,0.002,False,256,128,mamba,10,5,1024,5,0.1,0.006,10,64,5,0.4,10,True,256,32,16,8,10,0.018887581303715706,213,0.9961393475532532,22,0.018890738487243652,28,-0.01901574805378914,28,1.0,1,1.0,1.0,0.3154762089252472,1.0,0.032258063554763794,0,1.0,1,1,1,1
1,generous-sweep-3,99677,8,300,0.002,False,256,128,mamba,10,5,1024,5,0.1,0.006,10,64,5,0.4,10,True,256,32,16,8,10,0.018881332129240036,355,0.9961393475532532,35,0.018881363794207573,70,-0.019025122746825218,70,1.0,1,1.0,1.0,0.23175965249538422,1.0,0.04597701132297516,0,1.0,1,1,1,1
2,autumn-sweep-3,65502,8,300,0.002,False,256,128,mamba,10,5,1024,5,0.1,0.006,10,64,5,0.4,10,True,256,32,16,8,10,0.018886307254433632,388,0.9961393475532532,40,0.018886907026171684,84,-0.019019579514861107,84,1.0,1,1.0,1.0,0.32258063554763794,1.0,0.03252032399177551,0,0.9960159659385681,1,1,1,1
