"epoch","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - _step","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - _step__MIN","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - _step__MAX","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_1024_epoch","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_1024_epoch__MIN","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_1024_epoch__MAX","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_128_epoch","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_128_epoch__MIN","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_128_epoch__MAX","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_16_epoch","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_16_epoch__MIN","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_16_epoch__MAX","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_1_epoch","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_1_epoch__MIN","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_1_epoch__MAX","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_2048_epoch","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_2048_epoch__MIN","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_2048_epoch__MAX","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_256_epoch","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_256_epoch__MIN","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_256_epoch__MAX","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_2_epoch","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_2_epoch__MIN","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_2_epoch__MAX","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_32_epoch","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_32_epoch__MIN","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_32_epoch__MAX","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_4096_epoch","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_4096_epoch__MIN","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_4096_epoch__MAX","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_4_epoch","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_4_epoch__MIN","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_4_epoch__MAX","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_512_epoch","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_512_epoch__MIN","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_512_epoch__MAX","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_64_epoch","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_64_epoch__MIN","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_64_epoch__MAX","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_8_epoch","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_8_epoch__MIN","tag_itr_attention_max_prob_sum_head_12_dim_384_bs_64_weight_decay_0.01_lr_0.0005_use_compile_True_train_sudoku_test_sudoku-hard_num_layers_1_num_rep_attn_4_use_mpc_False - test_board_acc_32_8_epoch__MAX"
"0","338.84615384615387","279","621","0.9245555400848389","0.9245555400848389","0.9245555400848389","0.8694999814033508","0.8694999814033508","0.8694999814033508","0.7815555930137634","0.7815555930137634","0.7815555930137634","0.570722222328186","0.570722222328186","0.570722222328186","0.93622225522995","0.93622225522995","0.93622225522995","0.8913888931274414","0.8913888931274414","0.8913888931274414","0.6337222456932068","0.6337222456932068","0.6337222456932068","0.8141666650772095","0.8141666650772095","0.8141666650772095","0.9479444622993469","0.9479444622993469","0.9479444622993469","0.6915555596351624","0.6915555596351624","0.6915555596351624","0.9093888998031616","0.9093888998031616","0.9093888998031616","0.8456666469573975","0.8456666469573975","0.8456666469573975","0.7398889064788818","0.7398889064788818","0.7398889064788818"