uid,strategy,metric,score,metric_logical
2a0acd9bd535c8d1555642ff47c732d4,same_obvious,LLMJudge-qwen3_32b-seed42,0.8770833333333333,LLMJudge
2a0acd9bd535c8d1555642ff47c732d4,same_obvious,LLMJudge-qwen3_32b-seed42,0.875,LLMJudge
ecf022597d3cab9575117ef24fe97406,same_obvious,LLMJudge-qwen3_32b-seed42,0.9375,LLMJudge
3206cf67635a5ffccd10a1d3189db638,same_obvious,LLMJudge-qwen3_32b-seed42,0.7848958333333333,LLMJudge
a9b7430ccffa4f2f9c57dc2169c8a665,same_obvious,LLMJudge-qwen3_32b-seed42,0.90625,LLMJudge
ecf022597d3cab9575117ef24fe97406,same_obvious,LLMJudge-qwen3_32b-seed42,0.8125,LLMJudge
3669aa9b198a80a2be12cfd072637b2a,same_obvious,LLMJudge-qwen3_32b-seed42,0.9204861111111111,LLMJudge
3669aa9b198a80a2be12cfd072637b2a,same_obvious,LLMJudge-qwen3_32b-seed42,0.9444444444444444,LLMJudge
87ccd97c236471c1f39ade43a7bffda2,same_obvious,LLMJudge-qwen3_32b-seed42,0.975,LLMJudge
87ccd97c236471c1f39ade43a7bffda2,same_obvious,LLMJudge-qwen3_32b-seed42,0.715625,LLMJudge
3669aa9b198a80a2be12cfd072637b2a,same_obvious,LLMJudge-qwen3_32b-seed42,0.8368055555555556,LLMJudge
87ccd97c236471c1f39ade43a7bffda2,same_obvious,LLMJudge-qwen3_32b-seed42,0.975,LLMJudge
65f0aa2af23948f27b05e2f8664780cd,same_obvious,LLMJudge-qwen3_32b-seed42,0.7994791666666666,LLMJudge
87ccd97c236471c1f39ade43a7bffda2,same_obvious,LLMJudge-qwen3_32b-seed42,0.715625,LLMJudge
e3c55133670bf57b312552a88b185d48,same_obvious,LLMJudge-qwen3_32b-seed42,0.9791666666666666,LLMJudge
3e4716df00472cd23c669d4ac629ac57,same_obvious,LLMJudge-qwen3_32b-seed42,0.7847222222222222,LLMJudge
7deb88a6f0a6dc7a63e6b95ae6b72d2c,same_obvious,LLMJudge-qwen3_32b-seed42,0.96875,LLMJudge
87ccd97c236471c1f39ade43a7bffda2,same_obvious,LLMJudge-qwen3_32b-seed42,0.7145833333333333,LLMJudge
ecf022597d3cab9575117ef24fe97406,same_obvious,LLMJudge-qwen3_32b-seed42,0.90625,LLMJudge
8926974c96533a1af0a2a24bab42e8c9,same_obvious,LLMJudge-qwen3_32b-seed42,0.8161458333333333,LLMJudge
65f0aa2af23948f27b05e2f8664780cd,same_obvious,LLMJudge-qwen3_32b-seed42,0.7994791666666666,LLMJudge
ff3ca20062474bb3839f9ad89802fca3,same_obvious,LLMJudge-qwen3_32b-seed42,0.9375,LLMJudge
7deb88a6f0a6dc7a63e6b95ae6b72d2c,same_obvious,LLMJudge-qwen3_32b-seed42,0.96875,LLMJudge
3e4716df00472cd23c669d4ac629ac57,same_obvious,LLMJudge-qwen3_32b-seed42,0.9652777777777778,LLMJudge
3e4716df00472cd23c669d4ac629ac57,same_obvious,LLMJudge-qwen3_32b-seed42,0.9652777777777778,LLMJudge
8926974c96533a1af0a2a24bab42e8c9,same_obvious,LLMJudge-qwen3_32b-seed42,0.8786458333333333,LLMJudge
a334fc23bfa3455dc3342a79729df868,same_obvious,LLMJudge-qwen3_32b-seed42,0.9375,LLMJudge
62cd1ffdd6296220fde591628ea23af8,same_obvious,LLMJudge-qwen3_32b-seed42,0.7802083333333334,LLMJudge
3206cf67635a5ffccd10a1d3189db638,same_obvious,LLMJudge-qwen3_32b-seed42,0.9098958333333333,LLMJudge
ecf022597d3cab9575117ef24fe97406,same_obvious,LLMJudge-qwen3_32b-seed42,0.875,LLMJudge
2a0acd9bd535c8d1555642ff47c732d4,same_obvious,DNAEval-qwen3_32b-seed42,0.7012987012987013,DNAEval
2a0acd9bd535c8d1555642ff47c732d4,same_obvious,DNAEval-qwen3_32b-seed42,0.8376623376623378,DNAEval
ecf022597d3cab9575117ef24fe97406,same_obvious,DNAEval-qwen3_32b-seed42,0.7775974025974026,DNAEval
3206cf67635a5ffccd10a1d3189db638,same_obvious,DNAEval-qwen3_32b-seed42,0.8198051948051946,DNAEval
a9b7430ccffa4f2f9c57dc2169c8a665,same_obvious,DNAEval-qwen3_32b-seed42,0.3474025974025974,DNAEval
ecf022597d3cab9575117ef24fe97406,same_obvious,DNAEval-qwen3_32b-seed42,0.9853896103896103,DNAEval
3669aa9b198a80a2be12cfd072637b2a,same_obvious,DNAEval-qwen3_32b-seed42,0.8560606060606062,DNAEval
3669aa9b198a80a2be12cfd072637b2a,same_obvious,DNAEval-qwen3_32b-seed42,0.8820346320346321,DNAEval
87ccd97c236471c1f39ade43a7bffda2,same_obvious,DNAEval-qwen3_32b-seed42,0.9720779220779221,DNAEval
87ccd97c236471c1f39ade43a7bffda2,same_obvious,DNAEval-qwen3_32b-seed42,0.9818181818181819,DNAEval
3669aa9b198a80a2be12cfd072637b2a,same_obvious,DNAEval-qwen3_32b-seed42,0.6093073593073594,DNAEval
87ccd97c236471c1f39ade43a7bffda2,same_obvious,DNAEval-qwen3_32b-seed42,0.6863636363636364,DNAEval
65f0aa2af23948f27b05e2f8664780cd,same_obvious,DNAEval-qwen3_32b-seed42,0.7142857142857144,DNAEval
87ccd97c236471c1f39ade43a7bffda2,same_obvious,DNAEval-qwen3_32b-seed42,0.9857142857142857,DNAEval
e3c55133670bf57b312552a88b185d48,same_obvious,DNAEval-qwen3_32b-seed42,0.9610389610389611,DNAEval
3e4716df00472cd23c669d4ac629ac57,same_obvious,DNAEval-qwen3_32b-seed42,0.9567099567099567,DNAEval
7deb88a6f0a6dc7a63e6b95ae6b72d2c,same_obvious,DNAEval-qwen3_32b-seed42,0.9058441558441558,DNAEval
87ccd97c236471c1f39ade43a7bffda2,same_obvious,DNAEval-qwen3_32b-seed42,0.7415584415584416,DNAEval
ecf022597d3cab9575117ef24fe97406,same_obvious,DNAEval-qwen3_32b-seed42,0.8522727272727273,DNAEval
8926974c96533a1af0a2a24bab42e8c9,same_obvious,DNAEval-qwen3_32b-seed42,0.5909090909090908,DNAEval
65f0aa2af23948f27b05e2f8664780cd,same_obvious,DNAEval-qwen3_32b-seed42,0.5324675324675325,DNAEval
ff3ca20062474bb3839f9ad89802fca3,same_obvious,DNAEval-qwen3_32b-seed42,0.9870129870129869,DNAEval
7deb88a6f0a6dc7a63e6b95ae6b72d2c,same_obvious,DNAEval-qwen3_32b-seed42,0.8668831168831168,DNAEval
3e4716df00472cd23c669d4ac629ac57,same_obvious,DNAEval-qwen3_32b-seed42,0.5173160173160174,DNAEval
3e4716df00472cd23c669d4ac629ac57,same_obvious,DNAEval-qwen3_32b-seed42,0.7835497835497833,DNAEval
8926974c96533a1af0a2a24bab42e8c9,same_obvious,DNAEval-qwen3_32b-seed42,0.7467532467532468,DNAEval
a334fc23bfa3455dc3342a79729df868,same_obvious,DNAEval-qwen3_32b-seed42,0.9740259740259739,DNAEval
62cd1ffdd6296220fde591628ea23af8,same_obvious,DNAEval-qwen3_32b-seed42,0.49350649350649334,DNAEval
3206cf67635a5ffccd10a1d3189db638,same_obvious,DNAEval-qwen3_32b-seed42,0.7646103896103899,DNAEval
ecf022597d3cab9575117ef24fe97406,same_obvious,DNAEval-qwen3_32b-seed42,0.978896103896104,DNAEval
2a0acd9bd535c8d1555642ff47c732d4,same_obvious,Autometrics_Regression_time_sec,0.2506939808296804,Autometrics
2a0acd9bd535c8d1555642ff47c732d4,same_obvious,Autometrics_Regression_time_sec,0.43658627546667317,Autometrics
ecf022597d3cab9575117ef24fe97406,same_obvious,Autometrics_Regression_time_sec,0.3413831388580355,Autometrics
3206cf67635a5ffccd10a1d3189db638,same_obvious,Autometrics_Regression_time_sec,0.3264255501932185,Autometrics
a9b7430ccffa4f2f9c57dc2169c8a665,same_obvious,Autometrics_Regression_time_sec,0.4294879776112229,Autometrics
ecf022597d3cab9575117ef24fe97406,same_obvious,Autometrics_Regression_time_sec,0.367634625099428,Autometrics
3669aa9b198a80a2be12cfd072637b2a,same_obvious,Autometrics_Regression_time_sec,0.16390285662625315,Autometrics
3669aa9b198a80a2be12cfd072637b2a,same_obvious,Autometrics_Regression_time_sec,0.15890742893916165,Autometrics
87ccd97c236471c1f39ade43a7bffda2,same_obvious,Autometrics_Regression_time_sec,0.7529346957587425,Autometrics
87ccd97c236471c1f39ade43a7bffda2,same_obvious,Autometrics_Regression_time_sec,0.9924658554707809,Autometrics
3669aa9b198a80a2be12cfd072637b2a,same_obvious,Autometrics_Regression_time_sec,0.236485722718775,Autometrics
87ccd97c236471c1f39ade43a7bffda2,same_obvious,Autometrics_Regression_time_sec,0.3320567232835736,Autometrics
65f0aa2af23948f27b05e2f8664780cd,same_obvious,Autometrics_Regression_time_sec,0.7463631369481123,Autometrics
87ccd97c236471c1f39ade43a7bffda2,same_obvious,Autometrics_Regression_time_sec,0.36067361813525733,Autometrics
e3c55133670bf57b312552a88b185d48,same_obvious,Autometrics_Regression_time_sec,0.1408570352172559,Autometrics
3e4716df00472cd23c669d4ac629ac57,same_obvious,Autometrics_Regression_time_sec,0.9791422060899402,Autometrics
7deb88a6f0a6dc7a63e6b95ae6b72d2c,same_obvious,Autometrics_Regression_time_sec,0.7470633206570789,Autometrics
87ccd97c236471c1f39ade43a7bffda2,same_obvious,Autometrics_Regression_time_sec,0.6975984698225378,Autometrics
ecf022597d3cab9575117ef24fe97406,same_obvious,Autometrics_Regression_time_sec,0.32120829087983116,Autometrics
8926974c96533a1af0a2a24bab42e8c9,same_obvious,Autometrics_Regression_time_sec,0.2567817935633967,Autometrics
65f0aa2af23948f27b05e2f8664780cd,same_obvious,Autometrics_Regression_time_sec,0.7557672221444554,Autometrics
ff3ca20062474bb3839f9ad89802fca3,same_obvious,Autometrics_Regression_time_sec,0.6290426767307999,Autometrics
7deb88a6f0a6dc7a63e6b95ae6b72d2c,same_obvious,Autometrics_Regression_time_sec,0.9979626108304943,Autometrics
3e4716df00472cd23c669d4ac629ac57,same_obvious,Autometrics_Regression_time_sec,0.5170553482224151,Autometrics
3e4716df00472cd23c669d4ac629ac57,same_obvious,Autometrics_Regression_time_sec,0.3153213563333376,Autometrics
8926974c96533a1af0a2a24bab42e8c9,same_obvious,Autometrics_Regression_time_sec,0.39013969772725776,Autometrics
a334fc23bfa3455dc3342a79729df868,same_obvious,Autometrics_Regression_time_sec,0.3457665610138736,Autometrics
62cd1ffdd6296220fde591628ea23af8,same_obvious,Autometrics_Regression_time_sec,0.9532839509004304,Autometrics
3206cf67635a5ffccd10a1d3189db638,same_obvious,Autometrics_Regression_time_sec,0.41674371087634876,Autometrics
ecf022597d3cab9575117ef24fe97406,same_obvious,Autometrics_Regression_time_sec,0.9726011294240631,Autometrics
2a0acd9bd535c8d1555642ff47c732d4,same_obvious,LevenshteinDistance_min,0.4082308420056765,BEST_METRIC
2a0acd9bd535c8d1555642ff47c732d4,same_obvious,LevenshteinDistance_min,0.6381267738883633,BEST_METRIC
ecf022597d3cab9575117ef24fe97406,same_obvious,LevenshteinDistance_min,0.5028382213812677,BEST_METRIC
3206cf67635a5ffccd10a1d3189db638,same_obvious,LevenshteinDistance_min,0.6887417218543046,BEST_METRIC
a9b7430ccffa4f2f9c57dc2169c8a665,same_obvious,LevenshteinDistance_min,0.7105014191106906,BEST_METRIC
ecf022597d3cab9575117ef24fe97406,same_obvious,LevenshteinDistance_min,0.6400189214758751,BEST_METRIC
3669aa9b198a80a2be12cfd072637b2a,same_obvious,LevenshteinDistance_min,0.5279091769157994,BEST_METRIC
3669aa9b198a80a2be12cfd072637b2a,same_obvious,LevenshteinDistance_min,0.44843897824030277,BEST_METRIC
87ccd97c236471c1f39ade43a7bffda2,same_obvious,LevenshteinDistance_min,0.7748344370860927,BEST_METRIC
87ccd97c236471c1f39ade43a7bffda2,same_obvious,LevenshteinDistance_min,0.9186376537369915,BEST_METRIC
3669aa9b198a80a2be12cfd072637b2a,same_obvious,LevenshteinDistance_min,0.45979186376537373,BEST_METRIC
87ccd97c236471c1f39ade43a7bffda2,same_obvious,LevenshteinDistance_min,0.6754966887417219,BEST_METRIC
65f0aa2af23948f27b05e2f8664780cd,same_obvious,LevenshteinDistance_min,0.9659413434247871,BEST_METRIC
87ccd97c236471c1f39ade43a7bffda2,same_obvious,LevenshteinDistance_min,0.6158940397350994,BEST_METRIC
e3c55133670bf57b312552a88b185d48,same_obvious,LevenshteinDistance_min,0.6291390728476821,BEST_METRIC
3e4716df00472cd23c669d4ac629ac57,same_obvious,LevenshteinDistance_min,0.9653106275622831,BEST_METRIC
7deb88a6f0a6dc7a63e6b95ae6b72d2c,same_obvious,LevenshteinDistance_min,0.8675496688741722,BEST_METRIC
87ccd97c236471c1f39ade43a7bffda2,same_obvious,LevenshteinDistance_min,0.8921475875118259,BEST_METRIC
ecf022597d3cab9575117ef24fe97406,same_obvious,LevenshteinDistance_min,0.5529801324503312,BEST_METRIC
8926974c96533a1af0a2a24bab42e8c9,same_obvious,LevenshteinDistance_min,0.46121097445600756,BEST_METRIC
65f0aa2af23948f27b05e2f8664780cd,same_obvious,LevenshteinDistance_min,0.923368022705771,BEST_METRIC
ff3ca20062474bb3839f9ad89802fca3,same_obvious,LevenshteinDistance_min,0.9574266792809839,BEST_METRIC
7deb88a6f0a6dc7a63e6b95ae6b72d2c,same_obvious,LevenshteinDistance_min,0.9583727530747399,BEST_METRIC
3e4716df00472cd23c669d4ac629ac57,same_obvious,LevenshteinDistance_min,0.8085777357300536,BEST_METRIC
3e4716df00472cd23c669d4ac629ac57,same_obvious,LevenshteinDistance_min,0.6449069694102807,BEST_METRIC
8926974c96533a1af0a2a24bab42e8c9,same_obvious,LevenshteinDistance_min,0.5018921475875118,BEST_METRIC
a334fc23bfa3455dc3342a79729df868,same_obvious,LevenshteinDistance_min,0.4531693472090823,BEST_METRIC
62cd1ffdd6296220fde591628ea23af8,same_obvious,LevenshteinDistance_min,0.9820245979186377,BEST_METRIC
3206cf67635a5ffccd10a1d3189db638,same_obvious,LevenshteinDistance_min,0.7218543046357616,BEST_METRIC
ecf022597d3cab9575117ef24fe97406,same_obvious,LevenshteinDistance_min,0.9976348155156102,BEST_METRIC
