[{"key": "35951548", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.38130797387420445, "res": {"No": 0.6186808060243757, "Yes": 0.38130797387420445}, "ground_truth": 0}, {"key": "35951548", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9964606155400975, "res": {"Yes": 0.9964606155400975, "No": 0.0035379458097342053}, "ground_truth": 0}, {"key": "35951548", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9956296528829308, "res": {"Yes": 0.9956296528829308, "No": 0.004369669071882943}, "ground_truth": 1}, {"key": "35951548", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9890547222503309, "res": {"Yes": 0.9890547222503309, "No": 0.010944239874336448}, "ground_truth": 0}, {"key": "35951548", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.40008550188657577, "res": {"No": 0.5999033620428424, "Yes": 0.40008550188657577}, "ground_truth": 0}, {"key": "36266422", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.45058711797353035, "res": {"No": 0.5494111283860414, "Yes": 0.45058711797353035}, "ground_truth": 0}, {"key": "36266422", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9858124603057126, "res": {"Yes": 0.9858124603057126, "No": 0.014183229974524352}, "ground_truth": 0}, {"key": "36266422", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.988745908462361, "res": {"Yes": 0.988745908462361, "No": 0.011251748952165793}, "ground_truth": 1}, {"key": "36266422", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9570943404741354, "res": {"Yes": 0.9570943404741354, "No": 0.042901804113700294}, "ground_truth": 0}, {"key": "36266422", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9744674269410373, "res": {"Yes": 0.9744674269410373, "No": 0.02552952636490837}, "ground_truth": 0}, {"key": "38826984", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.03277726317000283, "res": {"No": 0.9672220550050549, "Yes": 0.03277726317000283}, "ground_truth": 0}, {"key": "38826984", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986918081299824, "res": {"Yes": 0.9986918081299824, "No": 0.0013077803083906077}, "ground_truth": 0}, {"key": "38826984", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9873534571911641, "res": {"Yes": 0.9873534571911641, "No": 0.012646168437567653}, "ground_truth": 1}, {"key": "38826984", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9912554856254687, "res": {"Yes": 0.9912554856254687, "No": 0.008744022105803414}, "ground_truth": 0}, {"key": "38826984", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9933019459370254, "res": {"Yes": 0.9933019459370254, "No": 0.0066977207301007835}, "ground_truth": 0}, {"key": "34540833", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9620225132918367, "res": {"Yes": 0.9620225132918367, "No": 0.037976330306549434}, "ground_truth": 0}, {"key": "34540833", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9836982612758076, "res": {"Yes": 0.9836982612758076, "No": 0.016300547219246897}, "ground_truth": 0}, {"key": "34540833", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9613330192133026, "res": {"Yes": 0.9613330192133026, "No": 0.03866424530218134}, "ground_truth": 1}, {"key": "34540833", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8896866917279749, "res": {"Yes": 0.8896866917279749, "No": 0.11031261382651059}, "ground_truth": 0}, {"key": "34540833", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9684634263874161, "res": {"Yes": 0.9684634263874161, "No": 0.03153291052438207}, "ground_truth": 0}, {"key": "20836172", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.052565340619903624, "res": {"No": 0.9474235284903387, "Yes": 0.052565340619903624}, "ground_truth": 0}, {"key": "20836172", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9853998781772466, "res": {"Yes": 0.9853998781772466, "No": 0.014597737909170266}, "ground_truth": 0}, {"key": "20836172", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9578015852308328, "res": {"Yes": 0.9578015852308328, "No": 0.04219603583127921}, "ground_truth": 1}, {"key": "20836172", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7431814584930927, "res": {"Yes": 0.7431814584930927, "No": 0.25681507644285706}, "ground_truth": 0}, {"key": "20836172", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9679307359851498, "res": {"Yes": 0.9679307359851498, "No": 0.03206735963413413}, "ground_truth": 0}, {"key": "35932467", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.012536166403071052, "res": {"No": 0.987461670148181, "Yes": 0.012536166403071052}, "ground_truth": 0}, {"key": "35932467", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9909134455313565, "res": {"Yes": 0.9909134455313565, "No": 0.009085114263980967}, "ground_truth": 0}, {"key": "35932467", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9946626584929292, "res": {"Yes": 0.9946626584929292, "No": 0.005334408350517898}, "ground_truth": 1}, {"key": "35932467", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9852062646463826, "res": {"Yes": 0.9852062646463826, "No": 0.014792507150894812}, "ground_truth": 0}, {"key": "35932467", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9933255934599453, "res": {"Yes": 0.9933255934599453, "No": 0.006673701669877752}, "ground_truth": 0}, {"key": "40758845", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8682151989435071, "res": {"Yes": 0.8682151989435071, "No": 0.13178381027564784}, "ground_truth": 0}, {"key": "40758845", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.994680589438294, "res": {"Yes": 0.994680589438294, "No": 0.005319042356546231}, "ground_truth": 0}, {"key": "40758845", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9845098429846655, "res": {"Yes": 0.9845098429846655, "No": 0.015488739545617261}, "ground_truth": 1}, {"key": "40758845", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.997337405010708, "res": {"Yes": 0.997337405010708, "No": 0.0026622783506343155}, "ground_truth": 0}, {"key": "40758845", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9911553461247988, "res": {"Yes": 0.9911553461247988, "No": 0.008843734825363993}, "ground_truth": 0}, {"key": "30358490", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.04405842874302338, "res": {"No": 0.9559403435712047, "Yes": 0.04405842874302338}, "ground_truth": 0}, {"key": "30358490", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8721410095037571, "res": {"Yes": 0.8721410095037571, "No": 0.12785779617845808}, "ground_truth": 0}, {"key": "30358490", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9669626102452605, "res": {"Yes": 0.9669626102452605, "No": 0.03303643287465619}, "ground_truth": 1}, {"key": "30358490", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9896369789789494, "res": {"Yes": 0.9896369789789494, "No": 0.010362673661626154}, "ground_truth": 0}, {"key": "30358490", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996056281250394, "res": {"Yes": 0.9996056281250394, "No": 0.00039405305514636014}, "ground_truth": 0}, {"key": "34615665", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9971242624776075, "res": {"Yes": 0.9971242624776075, "No": 0.0028752932785645127}, "ground_truth": 0}, {"key": "34615665", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.839629614083796, "res": {"Yes": 0.839629614083796, "No": 0.16036719891567058}, "ground_truth": 0}, {"key": "34615665", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8900707989836566, "res": {"Yes": 0.8900707989836566, "No": 0.10992704065732653}, "ground_truth": 1}, {"key": "34615665", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8595316595429336, "res": {"Yes": 0.8595316595429336, "No": 0.14046784185506883}, "ground_truth": 0}, {"key": "34615665", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.22303601908600845, "res": {"No": 0.7769443599457129, "Yes": 0.22303601908600845}, "ground_truth": 0}, {"key": "35890902", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9851110461069387, "res": {"Yes": 0.9851110461069387, "No": 0.014887988977556967}, "ground_truth": 0}, {"key": "35890902", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9979209014258587, "res": {"Yes": 0.9979209014258587, "No": 0.0020788745779343055}, "ground_truth": 0}, {"key": "35890902", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.996463574334996, "res": {"Yes": 0.996463574334996, "No": 0.0035335190732956726}, "ground_truth": 1}, {"key": "35890902", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9928347477063446, "res": {"Yes": 0.9928347477063446, "No": 0.0071646003309821776}, "ground_truth": 0}, {"key": "35890902", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9910167070651086, "res": {"Yes": 0.9910167070651086, "No": 0.008982229082692904}, "ground_truth": 0}, {"key": "37922330", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9967222698245976, "res": {"Yes": 0.9967222698245976, "No": 0.003277297923204697}, "ground_truth": 0}, {"key": "37922330", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9962310237877742, "res": {"Yes": 0.9962310237877742, "No": 0.0037687194637845457}, "ground_truth": 0}, {"key": "37922330", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9971303024764084, "res": {"Yes": 0.9971303024764084, "No": 0.0028690882001046816}, "ground_truth": 1}, {"key": "37922330", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9980176613338038, "res": {"Yes": 0.9980176613338038, "No": 0.0019817208412345275}, "ground_truth": 0}, {"key": "37922330", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9932170328786092, "res": {"Yes": 0.9932170328786092, "No": 0.006782738839302178}, "ground_truth": 0}, {"key": "30844962", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9982815549204822, "res": {"Yes": 0.9982815549204822, "No": 0.0017174931881814026}, "ground_truth": 0}, {"key": "30844962", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998594016848696, "res": {"Yes": 0.9998594016848696, "No": 0.00014010926156355873}, "ground_truth": 0}, {"key": "30844962", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999026601378318, "res": {"Yes": 0.9999026601378318, "No": 9.709691058611392e-05}, "ground_truth": 1}, {"key": "30844962", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991031131436655, "res": {"Yes": 0.9991031131436655, "No": 0.0008963577052938404}, "ground_truth": 0}, {"key": "30844962", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9976798545079824, "res": {"Yes": 0.9976798545079824, "No": 0.0023192556159103987}, "ground_truth": 0}, {"key": "36217333", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0017746176281885197, "res": {"No": 0.9982250119578421, "Yes": 0.0017746176281885197}, "ground_truth": 0}, {"key": "36217333", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.99609086105454, "res": {"Yes": 0.99609086105454, "No": 0.003909056259402534}, "ground_truth": 0}, {"key": "36217333", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9975539771011853, "res": {"Yes": 0.9975539771011853, "No": 0.0024456532838156918}, "ground_truth": 1}, {"key": "36217333", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990464758933019, "res": {"Yes": 0.9990464758933019, "No": 0.0009534006818657735}, "ground_truth": 0}, {"key": "36217333", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9963886511624042, "res": {"Yes": 0.9963886511624042, "No": 0.0036108250032165896}, "ground_truth": 0}, {"key": "30816523", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0035410701332177653, "res": {"No": 0.9964580060124545, "Yes": 0.0035410701332177653}, "ground_truth": 0}, {"key": "30816523", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.993012455611723, "res": {"Yes": 0.993012455611723, "No": 0.006986816240206917}, "ground_truth": 0}, {"key": "30816523", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9929134846094629, "res": {"Yes": 0.9929134846094629, "No": 0.0070842873262188235}, "ground_truth": 1}, {"key": "30816523", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9759446746355287, "res": {"Yes": 0.9759446746355287, "No": 0.02405469940652611}, "ground_truth": 0}, {"key": "30816523", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9476831998028378, "res": {"Yes": 0.9476831998028378, "No": 0.05231385651902862}, "ground_truth": 0}, {"key": "38900884", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5963888868405437, "res": {"Yes": 0.5963888868405437, "No": 0.4036061712940639}, "ground_truth": 0}, {"key": "38900884", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.884435090149493, "res": {"Yes": 0.884435090149493, "No": 0.1155621267146362}, "ground_truth": 0}, {"key": "38900884", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7182121500287408, "res": {"Yes": 0.7182121500287408, "No": 0.2817778174411699}, "ground_truth": 1}, {"key": "38900884", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8813894643135132, "res": {"Yes": 0.8813894643135132, "No": 0.11860837255635155}, "ground_truth": 0}, {"key": "38900884", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.865700398880378, "res": {"Yes": 0.865700398880378, "No": 0.13429518884950967}, "ground_truth": 0}, {"key": "13890581", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.3960802249453701, "res": {"No": 0.6039179709232603, "Yes": 0.3960802249453701}, "ground_truth": 0}, {"key": "13890581", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9960170565439941, "res": {"Yes": 0.9960170565439941, "No": 0.003982848548438723}, "ground_truth": 0}, {"key": "13890581", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9956600275178908, "res": {"Yes": 0.9956600275178908, "No": 0.004339934600467792}, "ground_truth": 1}, {"key": "13890581", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9914036844415073, "res": {"Yes": 0.9914036844415073, "No": 0.008595796278949384}, "ground_truth": 0}, {"key": "13890581", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.948669208148497, "res": {"Yes": 0.948669208148497, "No": 0.05132876308560484}, "ground_truth": 0}, {"key": "40194700", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8535845747291605, "res": {"Yes": 0.8535845747291605, "No": 0.14641430067781666}, "ground_truth": 0}, {"key": "40194700", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9683963490596996, "res": {"Yes": 0.9683963490596996, "No": 0.031603345302161974}, "ground_truth": 0}, {"key": "40194700", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9941977153524648, "res": {"Yes": 0.9941977153524648, "No": 0.005802039994965378}, "ground_truth": 1}, {"key": "40194700", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9967412213829884, "res": {"Yes": 0.9967412213829884, "No": 0.0032587476951409142}, "ground_truth": 0}, {"key": "40194700", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989917546212773, "res": {"Yes": 0.9989917546212773, "No": 0.001007859000070141}, "ground_truth": 0}, {"key": "37903647", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.18019921321390114, "res": {"No": 0.8197985087129319, "Yes": 0.18019921321390114}, "ground_truth": 0}, {"key": "37903647", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9738561793960754, "res": {"Yes": 0.9738561793960754, "No": 0.026142968908935103}, "ground_truth": 0}, {"key": "37903647", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992374798163856, "res": {"Yes": 0.9992374798163856, "No": 0.0007619175577956624}, "ground_truth": 1}, {"key": "37903647", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999579901626043, "res": {"Yes": 0.999579901626043, "No": 0.0004191269921804142}, "ground_truth": 0}, {"key": "37903647", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9518023988199247, "res": {"Yes": 0.9518023988199247, "No": 0.04819689399209294}, "ground_truth": 0}, {"key": "13291223", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9980259743565355, "res": {"Yes": 0.9980259743565355, "No": 0.001973935526207381}, "ground_truth": 0}, {"key": "13291223", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9927555514445098, "res": {"Yes": 0.9927555514445098, "No": 0.007244082864307206}, "ground_truth": 0}, {"key": "13291223", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992643776728689, "res": {"Yes": 0.9992643776728689, "No": 0.0007353080727280848}, "ground_truth": 1}, {"key": "13291223", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997676429990703, "res": {"Yes": 0.9997676429990703, "No": 0.00023226351276086762}, "ground_truth": 0}, {"key": "13291223", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998853774396677, "res": {"Yes": 0.9998853774396677, "No": 0.00011439387736357138}, "ground_truth": 0}, {"key": "36052570", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9327095912359545, "res": {"Yes": 0.9327095912359545, "No": 0.06728900692683276}, "ground_truth": 0}, {"key": "36052570", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9891694869494746, "res": {"Yes": 0.9891694869494746, "No": 0.010830126906350391}, "ground_truth": 0}, {"key": "36052570", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9905695348903911, "res": {"Yes": 0.9905695348903911, "No": 0.009430246902667977}, "ground_truth": 1}, {"key": "36052570", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8738300963440367, "res": {"Yes": 0.8738300963440367, "No": 0.1261674356261711}, "ground_truth": 0}, {"key": "36052570", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.982376368248725, "res": {"Yes": 0.982376368248725, "No": 0.01761867253232106}, "ground_truth": 0}, {"key": "34944735", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9840021863814344, "res": {"Yes": 0.9840021863814344, "No": 0.01599728273863849}, "ground_truth": 0}, {"key": "34944735", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.864123068437984, "res": {"Yes": 0.864123068437984, "No": 0.13587609156173577}, "ground_truth": 0}, {"key": "34944735", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9453210724378376, "res": {"Yes": 0.9453210724378376, "No": 0.054677373625547566}, "ground_truth": 1}, {"key": "34944735", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9620551706623633, "res": {"Yes": 0.9620551706623633, "No": 0.03794185586058056}, "ground_truth": 0}, {"key": "34944735", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8016720098812986, "res": {"Yes": 0.8016720098812986, "No": 0.19831876868555412}, "ground_truth": 0}, {"key": "32159602", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9984181859886547, "res": {"Yes": 0.9984181859886547, "No": 0.0015809009072189054}, "ground_truth": 0}, {"key": "32159602", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981747598804558, "res": {"Yes": 0.9981747598804558, "No": 0.0018245899538765784}, "ground_truth": 0}, {"key": "32159602", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9974064271367604, "res": {"Yes": 0.9974064271367604, "No": 0.0025927427156646667}, "ground_truth": 1}, {"key": "32159602", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9966818906325672, "res": {"Yes": 0.9966818906325672, "No": 0.0033168703709131556}, "ground_truth": 0}, {"key": "32159602", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9981894888562464, "res": {"Yes": 0.9981894888562464, "No": 0.0018089820413664246}, "ground_truth": 0}, {"key": "34988915", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.4677137572471923, "res": {"No": 0.532282699894202, "Yes": 0.4677137572471923}, "ground_truth": 0}, {"key": "34988915", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8279979752086901, "res": {"Yes": 0.8279979752086901, "No": 0.17198921631107414}, "ground_truth": 0}, {"key": "34988915", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9435706596374801, "res": {"Yes": 0.9435706596374801, "No": 0.056416060521237335}, "ground_truth": 1}, {"key": "34988915", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9588425049506494, "res": {"Yes": 0.9588425049506494, "No": 0.04114908038427613}, "ground_truth": 0}, {"key": "34988915", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.7770217940791039, "res": {"Yes": 0.7770217940791039, "No": 0.2229527529832306}, "ground_truth": 0}, {"key": "37889203", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9293347512831791, "res": {"Yes": 0.9293347512831791, "No": 0.07066442607265003}, "ground_truth": 0}, {"key": "37889203", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.0069907600132421725, "res": {"No": 0.9930084567586159, "Yes": 0.0069907600132421725}, "ground_truth": 0}, {"key": "37889203", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9965892975051219, "res": {"Yes": 0.9965892975051219, "No": 0.0034102693586495537}, "ground_truth": 1}, {"key": "37889203", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9971051771036025, "res": {"Yes": 0.9971051771036025, "No": 0.0028943086637959366}, "ground_truth": 0}, {"key": "37889203", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9959891427603256, "res": {"Yes": 0.9959891427603256, "No": 0.004010577632630703}, "ground_truth": 0}, {"key": "33609927", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5930601213782568, "res": {"Yes": 0.5930601213782568, "No": 0.40692495349033686}, "ground_truth": 0}, {"key": "33609927", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.649339815951109, "res": {"Yes": 0.649339815951109, "No": 0.350654840405734}, "ground_truth": 0}, {"key": "33609927", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9963456905998305, "res": {"Yes": 0.9963456905998305, "No": 0.003652351763882882}, "ground_truth": 1}, {"key": "33609927", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9963016743147675, "res": {"Yes": 0.9963016743147675, "No": 0.0036975697480028736}, "ground_truth": 0}, {"key": "33609927", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988185673417251, "res": {"Yes": 0.9988185673417251, "No": 0.00117996913219913}, "ground_truth": 0}, {"key": "33578778", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8132528818611271, "res": {"Yes": 0.8132528818611271, "No": 0.18674140128846553}, "ground_truth": 0}, {"key": "33578778", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9775518256111676, "res": {"Yes": 0.9775518256111676, "No": 0.022444497389091853}, "ground_truth": 0}, {"key": "33578778", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9728524749965423, "res": {"Yes": 0.9728524749965423, "No": 0.02714456057010993}, "ground_truth": 1}, {"key": "33578778", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9789083213850399, "res": {"Yes": 0.9789083213850399, "No": 0.021090486857544166}, "ground_truth": 0}, {"key": "33578778", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9767662962788088, "res": {"Yes": 0.9767662962788088, "No": 0.023232193763581134}, "ground_truth": 0}, {"key": "36888270", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9263353322773628, "res": {"Yes": 0.9263353322773628, "No": 0.07366253158782045}, "ground_truth": 0}, {"key": "36888270", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9971203475760135, "res": {"Yes": 0.9971203475760135, "No": 0.002877400490197765}, "ground_truth": 0}, {"key": "36888270", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9967087726020242, "res": {"Yes": 0.9967087726020242, "No": 0.0032895648085194394}, "ground_truth": 1}, {"key": "36888270", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9894837099357442, "res": {"Yes": 0.9894837099357442, "No": 0.010515362456482762}, "ground_truth": 0}, {"key": "36888270", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9966637716188342, "res": {"Yes": 0.9966637716188342, "No": 0.003334380746714091}, "ground_truth": 0}, {"key": "36846007", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9515640097919582, "res": {"Yes": 0.9515640097919582, "No": 0.048435275407220015}, "ground_truth": 0}, {"key": "36846007", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9362158688332443, "res": {"Yes": 0.9362158688332443, "No": 0.06378080474286325}, "ground_truth": 0}, {"key": "36846007", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.949419408654338, "res": {"Yes": 0.949419408654338, "No": 0.05057988051704641}, "ground_truth": 1}, {"key": "36846007", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9944190516789391, "res": {"Yes": 0.9944190516789391, "No": 0.005580242749726242}, "ground_truth": 0}, {"key": "36846007", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9581897545330058, "res": {"Yes": 0.9581897545330058, "No": 0.04180832903877864}, "ground_truth": 0}, {"key": "31723471", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8462910396820674, "res": {"Yes": 0.8462910396820674, "No": 0.1537065367598552}, "ground_truth": 0}, {"key": "31723471", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9937893717771829, "res": {"Yes": 0.9937893717771829, "No": 0.00620962978969373}, "ground_truth": 0}, {"key": "31723471", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9861199101949032, "res": {"Yes": 0.9861199101949032, "No": 0.013879209194309005}, "ground_truth": 1}, {"key": "31723471", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9923177410556061, "res": {"Yes": 0.9923177410556061, "No": 0.00768184904295004}, "ground_truth": 0}, {"key": "31723471", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9501683210779831, "res": {"Yes": 0.9501683210779831, "No": 0.04983080060953565}, "ground_truth": 0}, {"key": "15921828", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.13439909292309196, "res": {"No": 0.8656005374424816, "Yes": 0.13439909292309196}, "ground_truth": 0}, {"key": "15921828", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9923266684823212, "res": {"Yes": 0.9923266684823212, "No": 0.007673073705554648}, "ground_truth": 0}, {"key": "15921828", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9931061503596665, "res": {"Yes": 0.9931061503596665, "No": 0.006893433411542195}, "ground_truth": 1}, {"key": "15921828", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999782301739706, "res": {"Yes": 0.999782301739706, "No": 0.0002175075489982313}, "ground_truth": 0}, {"key": "15921828", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9983685231077084, "res": {"Yes": 0.9983685231077084, "No": 0.001631034178522967}, "ground_truth": 0}, {"key": "39109408", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9158957507588465, "res": {"Yes": 0.9158957507588465, "No": 0.0841014811825153}, "ground_truth": 0}, {"key": "39109408", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.993078175648601, "res": {"Yes": 0.993078175648601, "No": 0.0069210346820651044}, "ground_truth": 0}, {"key": "39109408", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9957489036005754, "res": {"Yes": 0.9957489036005754, "No": 0.004250287292904522}, "ground_truth": 1}, {"key": "39109408", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.991444459235016, "res": {"Yes": 0.991444459235016, "No": 0.008554541221664795}, "ground_truth": 0}, {"key": "39109408", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9970583681027145, "res": {"Yes": 0.9970583681027145, "No": 0.0029411194641118946}, "ground_truth": 0}, {"key": "20936833", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9109396826118027, "res": {"Yes": 0.9109396826118027, "No": 0.08905503763388406}, "ground_truth": 0}, {"key": "20936833", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998843047282142, "res": {"Yes": 0.9998843047282142, "No": 0.0001150020044215859}, "ground_truth": 0}, {"key": "20936833", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9978864743473722, "res": {"Yes": 0.9978864743473722, "No": 0.002112707455009089}, "ground_truth": 1}, {"key": "20936833", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9925831098921665, "res": {"Yes": 0.9925831098921665, "No": 0.007414866556354241}, "ground_truth": 0}, {"key": "20936833", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9743826383213013, "res": {"Yes": 0.9743826383213013, "No": 0.02561490167934143}, "ground_truth": 0}, {"key": "36832879", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0009132270706706744, "res": {"No": 0.99908431799287, "Yes": 0.0009132270706706744}, "ground_truth": 0}, {"key": "36832879", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8562282167682227, "res": {"Yes": 0.8562282167682227, "No": 0.14376854484615775}, "ground_truth": 0}, {"key": "36832879", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9853797284338545, "res": {"Yes": 0.9853797284338545, "No": 0.014619953576610289}, "ground_truth": 1}, {"key": "36832879", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8067130904587388, "res": {"Yes": 0.8067130904587388, "No": 0.19327979980757726}, "ground_truth": 0}, {"key": "36832879", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.995637576135235, "res": {"Yes": 0.995637576135235, "No": 0.004361127299977839}, "ground_truth": 0}, {"key": "14958201", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6057153637724644, "res": {"Yes": 0.6057153637724644, "No": 0.39427583334387817}, "ground_truth": 0}, {"key": "14958201", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8377085678906174, "res": {"Yes": 0.8377085678906174, "No": 0.16228325222827883}, "ground_truth": 0}, {"key": "14958201", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9530746622100521, "res": {"Yes": 0.9530746622100521, "No": 0.04692194981034887}, "ground_truth": 1}, {"key": "14958201", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8527610241532622, "res": {"Yes": 0.8527610241532622, "No": 0.14723425340434088}, "ground_truth": 0}, {"key": "14958201", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.819627483794014, "res": {"Yes": 0.819627483794014, "No": 0.18036310680517811}, "ground_truth": 0}, {"key": "34352262", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9436191623030832, "res": {"Yes": 0.9436191623030832, "No": 0.056379593687502186}, "ground_truth": 0}, {"key": "34352262", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988930227518525, "res": {"Yes": 0.9988930227518525, "No": 0.0011063175515039936}, "ground_truth": 0}, {"key": "34352262", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9972253673283312, "res": {"Yes": 0.9972253673283312, "No": 0.0027730929312995436}, "ground_truth": 1}, {"key": "34352262", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9855159843378442, "res": {"Yes": 0.9855159843378442, "No": 0.014482807828714729}, "ground_truth": 0}, {"key": "34352262", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9946158504712301, "res": {"Yes": 0.9946158504712301, "No": 0.005383565995161731}, "ground_truth": 0}, {"key": "39805395", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9572828539818072, "res": {"Yes": 0.9572828539818072, "No": 0.04271492876760938}, "ground_truth": 0}, {"key": "39805395", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9938352888882731, "res": {"Yes": 0.9938352888882731, "No": 0.0061625310566953955}, "ground_truth": 0}, {"key": "39805395", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9917768882815894, "res": {"Yes": 0.9917768882815894, "No": 0.008221830550490172}, "ground_truth": 1}, {"key": "39805395", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998085094275899, "res": {"Yes": 0.998085094275899, "No": 0.0019146163803385097}, "ground_truth": 0}, {"key": "39805395", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9823585422088829, "res": {"Yes": 0.9823585422088829, "No": 0.01763866343967489}, "ground_truth": 0}, {"key": "34303109", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9798947059481239, "res": {"Yes": 0.9798947059481239, "No": 0.020104041791049403}, "ground_truth": 0}, {"key": "34303109", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996520819875135, "res": {"Yes": 0.9996520819875135, "No": 0.0003478118101126463}, "ground_truth": 0}, {"key": "34303109", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995479799918402, "res": {"Yes": 0.9995479799918402, "No": 0.000451654523012145}, "ground_truth": 1}, {"key": "34303109", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995502438206463, "res": {"Yes": 0.9995502438206463, "No": 0.000449255135990405}, "ground_truth": 0}, {"key": "34303109", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998055380101843, "res": {"Yes": 0.9998055380101843, "No": 0.00019384997942230732}, "ground_truth": 0}, {"key": "39939090", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9720101744843411, "res": {"Yes": 0.9720101744843411, "No": 0.027989341555723985}, "ground_truth": 0}, {"key": "39939090", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9741376693752483, "res": {"Yes": 0.9741376693752483, "No": 0.025861246627856695}, "ground_truth": 0}, {"key": "39939090", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9674507304252186, "res": {"Yes": 0.9674507304252186, "No": 0.03254799531789653}, "ground_truth": 1}, {"key": "39939090", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9795394154788182, "res": {"Yes": 0.9795394154788182, "No": 0.020459382494982786}, "ground_truth": 0}, {"key": "39939090", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.7017946072723344, "res": {"Yes": 0.7017946072723344, "No": 0.29820437660959387}, "ground_truth": 0}, {"key": "29347771", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9891312335375977, "res": {"Yes": 0.9891312335375977, "No": 0.010866745768437658}, "ground_truth": 0}, {"key": "29347771", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9949775562605567, "res": {"Yes": 0.9949775562605567, "No": 0.005021814281069275}, "ground_truth": 0}, {"key": "29347771", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.997271013472219, "res": {"Yes": 0.997271013472219, "No": 0.0027287572314702866}, "ground_truth": 1}, {"key": "29347771", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9908407562735826, "res": {"Yes": 0.9908407562735826, "No": 0.009158947579174835}, "ground_truth": 0}, {"key": "29347771", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9843898090175299, "res": {"Yes": 0.9843898090175299, "No": 0.015608311783236024}, "ground_truth": 0}, {"key": "36783415", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9812852470206961, "res": {"Yes": 0.9812852470206961, "No": 0.01871143770723982}, "ground_truth": 0}, {"key": "36783415", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9639934586764076, "res": {"Yes": 0.9639934586764076, "No": 0.03600200671486412}, "ground_truth": 0}, {"key": "36783415", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8782271287674527, "res": {"Yes": 0.8782271287674527, "No": 0.12176888696192333}, "ground_truth": 1}, {"key": "36783415", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9742770648103383, "res": {"Yes": 0.9742770648103383, "No": 0.02572050616461966}, "ground_truth": 0}, {"key": "36783415", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.6973184262252345, "res": {"Yes": 0.6973184262252345, "No": 0.3026574624254388}, "ground_truth": 0}, {"key": "37935687", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0465350075081652, "res": {"No": 0.9534573737262094, "Yes": 0.0465350075081652}, "ground_truth": 0}, {"key": "37935687", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.810824636049274, "res": {"Yes": 0.810824636049274, "No": 0.18915836589287272}, "ground_truth": 0}, {"key": "37935687", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9260123106369275, "res": {"Yes": 0.9260123106369275, "No": 0.07398400187714274}, "ground_truth": 1}, {"key": "37935687", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9119640669118249, "res": {"Yes": 0.9119640669118249, "No": 0.08803149771196628}, "ground_truth": 0}, {"key": "37935687", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.015383871862515662, "res": {"No": 0.9846119970873931, "Yes": 0.015383871862515662}, "ground_truth": 0}, {"key": "40260829", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9026240199400476, "res": {"Yes": 0.9026240199400476, "No": 0.09737561100971248}, "ground_truth": 0}, {"key": "40260829", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9979737284655261, "res": {"Yes": 0.9979737284655261, "No": 0.002025687763076002}, "ground_truth": 0}, {"key": "40260829", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993985351025728, "res": {"Yes": 0.9993985351025728, "No": 0.0006011770947677217}, "ground_truth": 1}, {"key": "40260829", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9901926864640859, "res": {"Yes": 0.9901926864640859, "No": 0.009806812321039}, "ground_truth": 0}, {"key": "40260829", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.97536241632471, "res": {"Yes": 0.97536241632471, "No": 0.024637409001643063}, "ground_truth": 0}, {"key": "36478199", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.78600090119112, "res": {"Yes": 0.78600090119112, "No": 0.21399723170540655}, "ground_truth": 0}, {"key": "36478199", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.530140878110994, "res": {"Yes": 0.530140878110994, "No": 0.46985798007208385}, "ground_truth": 0}, {"key": "36478199", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6421794225249022, "res": {"Yes": 0.6421794225249022, "No": 0.35781957502475953}, "ground_truth": 1}, {"key": "36478199", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9833617779558387, "res": {"Yes": 0.9833617779558387, "No": 0.01663710228965694}, "ground_truth": 0}, {"key": "36478199", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.932649442733925, "res": {"Yes": 0.932649442733925, "No": 0.06734962796809775}, "ground_truth": 0}, {"key": "34541803", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.1523138780683083, "res": {"No": 0.8476773068388213, "Yes": 0.1523138780683083}, "ground_truth": 0}, {"key": "34541803", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.520625581049598, "res": {"Yes": 0.520625581049598, "No": 0.4793703533738282}, "ground_truth": 0}, {"key": "34541803", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.28222058429549374, "res": {"No": 0.717775523630144, "Yes": 0.28222058429549374}, "ground_truth": 1}, {"key": "34541803", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.584956595634198, "res": {"Yes": 0.584956595634198, "No": 0.4150400862677572}, "ground_truth": 0}, {"key": "34541803", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.0069459119576481135, "res": {"No": 0.9930419630180347, "Yes": 0.0069459119576481135}, "ground_truth": 0}, {"key": "35360841", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9448239548563961, "res": {"Yes": 0.9448239548563961, "No": 0.05517395868318692}, "ground_truth": 0}, {"key": "35360841", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9779746495390355, "res": {"Yes": 0.9779746495390355, "No": 0.022024771098886134}, "ground_truth": 0}, {"key": "35360841", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9957681751130422, "res": {"Yes": 0.9957681751130422, "No": 0.004231439654391479}, "ground_truth": 1}, {"key": "35360841", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9982682490141416, "res": {"Yes": 0.9982682490141416, "No": 0.0017311255068758846}, "ground_truth": 0}, {"key": "35360841", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9956912295028968, "res": {"Yes": 0.9956912295028968, "No": 0.004307887647480982}, "ground_truth": 0}, {"key": "35550407", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0018183143288767248, "res": {"No": 0.9981814122383104, "Yes": 0.0018183143288767248}, "ground_truth": 0}, {"key": "35550407", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981213201323695, "res": {"Yes": 0.9981213201323695, "No": 0.0018785768189777115}, "ground_truth": 0}, {"key": "35550407", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9669051095894298, "res": {"Yes": 0.9669051095894298, "No": 0.033093876953840515}, "ground_truth": 1}, {"key": "35550407", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9956638108340513, "res": {"Yes": 0.9956638108340513, "No": 0.004336095457417753}, "ground_truth": 0}, {"key": "35550407", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.39454313647385403, "res": {"No": 0.6054558340121753, "Yes": 0.39454313647385403}, "ground_truth": 0}, {"key": "37561590", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9756724747869951, "res": {"Yes": 0.9756724747869951, "No": 0.02432700570107222}, "ground_truth": 0}, {"key": "37561590", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.868481816667249, "res": {"Yes": 0.868481816667249, "No": 0.1315168892829076}, "ground_truth": 0}, {"key": "37561590", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9955405722081356, "res": {"Yes": 0.9955405722081356, "No": 0.00445922943633136}, "ground_truth": 1}, {"key": "37561590", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9900094445000159, "res": {"Yes": 0.9900094445000159, "No": 0.009989996866354026}, "ground_truth": 0}, {"key": "37561590", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9604348772087641, "res": {"Yes": 0.9604348772087641, "No": 0.03956347637806118}, "ground_truth": 0}, {"key": "39328843", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.00671049875940591, "res": {"No": 0.9932890124352501, "Yes": 0.00671049875940591}, "ground_truth": 0}, {"key": "39328843", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9901193970942189, "res": {"Yes": 0.9901193970942189, "No": 0.009880284224245077}, "ground_truth": 0}, {"key": "39328843", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9828394104167095, "res": {"Yes": 0.9828394104167095, "No": 0.017160353240902725}, "ground_truth": 1}, {"key": "39328843", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9776173306209689, "res": {"Yes": 0.9776173306209689, "No": 0.022382302485514587}, "ground_truth": 0}, {"key": "39328843", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9011548063353828, "res": {"Yes": 0.9011548063353828, "No": 0.0988449023742258}, "ground_truth": 0}, {"key": "35389665", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.614304429617939, "res": {"Yes": 0.614304429617939, "No": 0.38569100703414166}, "ground_truth": 0}, {"key": "35389665", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.01903300312667469, "res": {"No": 0.9809665724223906, "Yes": 0.01903300312667469}, "ground_truth": 0}, {"key": "35389665", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8845361859733277, "res": {"Yes": 0.8845361859733277, "No": 0.11546163138620108}, "ground_truth": 1}, {"key": "35389665", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9118188481975162, "res": {"Yes": 0.9118188481975162, "No": 0.08817946202007691}, "ground_truth": 0}, {"key": "35389665", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.7715927839923967, "res": {"Yes": 0.7715927839923967, "No": 0.2284045454793745}, "ground_truth": 0}, {"key": "33080187", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.020904985662804205, "res": {"No": 0.9790919191510564, "Yes": 0.020904985662804205}, "ground_truth": 0}, {"key": "33080187", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.7442921994912676, "res": {"Yes": 0.7442921994912676, "No": 0.2557060593141461}, "ground_truth": 0}, {"key": "33080187", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.80099899227357, "res": {"Yes": 0.80099899227357, "No": 0.1989984188035532}, "ground_truth": 1}, {"key": "33080187", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7850394952853063, "res": {"Yes": 0.7850394952853063, "No": 0.21495871339836714}, "ground_truth": 0}, {"key": "33080187", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9578870115323717, "res": {"Yes": 0.9578870115323717, "No": 0.04211166913907698}, "ground_truth": 0}, {"key": "38636995", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.129298706127793, "res": {"No": 0.870700751542911, "Yes": 0.129298706127793}, "ground_truth": 0}, {"key": "38636995", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8730368877727396, "res": {"Yes": 0.8730368877727396, "No": 0.12696162544932044}, "ground_truth": 0}, {"key": "38636995", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9635746733030749, "res": {"Yes": 0.9635746733030749, "No": 0.0364240320877244}, "ground_truth": 1}, {"key": "38636995", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998822784997197, "res": {"Yes": 0.9998822784997197, "No": 0.00011751847531700427}, "ground_truth": 0}, {"key": "38636995", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990958555851012, "res": {"Yes": 0.9990958555851012, "No": 0.0009040815422825789}, "ground_truth": 0}, {"key": "18536236", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9870915904949149, "res": {"Yes": 0.9870915904949149, "No": 0.012906985532927222}, "ground_truth": 0}, {"key": "18536236", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9715368129354065, "res": {"Yes": 0.9715368129354065, "No": 0.028462805904989105}, "ground_truth": 0}, {"key": "18536236", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985536673486816, "res": {"Yes": 0.9985536673486816, "No": 0.0014455766084821046}, "ground_truth": 1}, {"key": "18536236", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9657371888662957, "res": {"Yes": 0.9657371888662957, "No": 0.034262627366051866}, "ground_truth": 0}, {"key": "18536236", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9979155594697793, "res": {"Yes": 0.9979155594697793, "No": 0.0020839453063273973}, "ground_truth": 0}, {"key": "36289151", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9951574329194491, "res": {"Yes": 0.9951574329194491, "No": 0.0048419633000367675}, "ground_truth": 0}, {"key": "36289151", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986030006133944, "res": {"Yes": 0.9986030006133944, "No": 0.0013953716268655452}, "ground_truth": 0}, {"key": "36289151", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9979092658357752, "res": {"Yes": 0.9979092658357752, "No": 0.0020892573731491016}, "ground_truth": 1}, {"key": "36289151", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991821305377011, "res": {"Yes": 0.9991821305377011, "No": 0.0008175757491293884}, "ground_truth": 0}, {"key": "36289151", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 6.0397641608664064e-05, "res": {"No": 0.9999384183962248, "Yes": 6.0397641608664064e-05}, "ground_truth": 0}, {"key": "23017045", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9680613118726076, "res": {"Yes": 0.9680613118726076, "No": 0.03193811994541108}, "ground_truth": 0}, {"key": "23017045", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9653028912009708, "res": {"Yes": 0.9653028912009708, "No": 0.03469349755274177}, "ground_truth": 0}, {"key": "23017045", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.07881854241218579, "res": {"No": 0.9211804625091868, "Yes": 0.07881854241218579}, "ground_truth": 1}, {"key": "23017045", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9253664201653163, "res": {"Yes": 0.9253664201653163, "No": 0.07463316496412188}, "ground_truth": 0}, {"key": "23017045", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 8.00275986226616e-05, "res": {"No": 0.9999193471666147, "Yes": 8.00275986226616e-05}, "ground_truth": 0}, {"key": "36418082", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8716090785558942, "res": {"Yes": 0.8716090785558942, "No": 0.12838909772692883}, "ground_truth": 0}, {"key": "36418082", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.7142221504150045, "res": {"Yes": 0.7142221504150045, "No": 0.2857652812633764}, "ground_truth": 0}, {"key": "36418082", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9315557360514793, "res": {"Yes": 0.9315557360514793, "No": 0.0684421921654427}, "ground_truth": 1}, {"key": "36418082", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9449673210365799, "res": {"Yes": 0.9449673210365799, "No": 0.055031193498655345}, "ground_truth": 0}, {"key": "36418082", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9624778221574543, "res": {"Yes": 0.9624778221574543, "No": 0.037520013313753486}, "ground_truth": 0}, {"key": "34396551", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7588280616419352, "res": {"Yes": 0.7588280616419352, "No": 0.241169662356004}, "ground_truth": 0}, {"key": "34396551", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8138632700501417, "res": {"Yes": 0.8138632700501417, "No": 0.1861359422340313}, "ground_truth": 0}, {"key": "34396551", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9954986302704779, "res": {"Yes": 0.9954986302704779, "No": 0.004498731548734276}, "ground_truth": 1}, {"key": "34396551", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9804201506900284, "res": {"Yes": 0.9804201506900284, "No": 0.01957928224716177}, "ground_truth": 0}, {"key": "34396551", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9693009067646532, "res": {"Yes": 0.9693009067646532, "No": 0.030698008383346232}, "ground_truth": 0}, {"key": "39720944", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8948996552786125, "res": {"Yes": 0.8948996552786125, "No": 0.10509673155635199}, "ground_truth": 0}, {"key": "39720944", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9407260267109193, "res": {"Yes": 0.9407260267109193, "No": 0.05926951417661137}, "ground_truth": 0}, {"key": "39720944", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9622149582049571, "res": {"Yes": 0.9622149582049571, "No": 0.03778262266184095}, "ground_truth": 1}, {"key": "39720944", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9217412359208864, "res": {"Yes": 0.9217412359208864, "No": 0.07825667106256522}, "ground_truth": 0}, {"key": "39720944", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9201955335999988, "res": {"Yes": 0.9201955335999988, "No": 0.07979986230732346}, "ground_truth": 0}, {"key": "35884842", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9880775286691483, "res": {"Yes": 0.9880775286691483, "No": 0.011921390159808408}, "ground_truth": 0}, {"key": "35884842", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9643531936685762, "res": {"Yes": 0.9643531936685762, "No": 0.035644629094764915}, "ground_truth": 0}, {"key": "35884842", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9509582010799587, "res": {"Yes": 0.9509582010799587, "No": 0.04903596355397659}, "ground_truth": 1}, {"key": "35884842", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9422508450430561, "res": {"Yes": 0.9422508450430561, "No": 0.05774623075626993}, "ground_truth": 0}, {"key": "35884842", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.49907926906758926, "res": {"No": 0.5009083944623792, "Yes": 0.49907926906758926}, "ground_truth": 0}, {"key": "35403375", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.977783938630311, "res": {"Yes": 0.977783938630311, "No": 0.02221241304936931}, "ground_truth": 0}, {"key": "35403375", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9569294846454737, "res": {"Yes": 0.9569294846454737, "No": 0.043067384865383584}, "ground_truth": 0}, {"key": "35403375", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9188176210689871, "res": {"Yes": 0.9188176210689871, "No": 0.08118033832415004}, "ground_truth": 1}, {"key": "35403375", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9640445420432248, "res": {"Yes": 0.9640445420432248, "No": 0.03595217125916791}, "ground_truth": 0}, {"key": "35403375", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9856022404713067, "res": {"Yes": 0.9856022404713067, "No": 0.014393076307593459}, "ground_truth": 0}, {"key": "26341324", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.16633539120521829, "res": {"No": 0.8336637628211714, "Yes": 0.16633539120521829}, "ground_truth": 0}, {"key": "26341324", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9960075949249058, "res": {"Yes": 0.9960075949249058, "No": 0.003992244399848483}, "ground_truth": 0}, {"key": "26341324", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9826865160862471, "res": {"Yes": 0.9826865160862471, "No": 0.017313310157991698}, "ground_truth": 1}, {"key": "26341324", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9982121790592221, "res": {"Yes": 0.9982121790592221, "No": 0.0017877695456415422}, "ground_truth": 0}, {"key": "26341324", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9919187929437516, "res": {"Yes": 0.9919187929437516, "No": 0.008080278008891615}, "ground_truth": 0}, {"key": "19212345", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7432572594354477, "res": {"Yes": 0.7432572594354477, "No": 0.25674135541756987}, "ground_truth": 0}, {"key": "19212345", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9161002931162953, "res": {"Yes": 0.9161002931162953, "No": 0.08389882251446586}, "ground_truth": 0}, {"key": "19212345", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9944422837670834, "res": {"Yes": 0.9944422837670834, "No": 0.005557197747609321}, "ground_truth": 1}, {"key": "19212345", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9970401200077453, "res": {"Yes": 0.9970401200077453, "No": 0.0029594761578712646}, "ground_truth": 0}, {"key": "19212345", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9823140187993129, "res": {"Yes": 0.9823140187993129, "No": 0.01768556515663232}, "ground_truth": 0}, {"key": "30548367", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.905519741617124, "res": {"Yes": 0.905519741617124, "No": 0.09447788434183747}, "ground_truth": 0}, {"key": "30548367", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9887144500342505, "res": {"Yes": 0.9887144500342505, "No": 0.011285175694289787}, "ground_truth": 0}, {"key": "30548367", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9963169398656013, "res": {"Yes": 0.9963169398656013, "No": 0.003681243780770366}, "ground_truth": 1}, {"key": "30548367", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9868125544720807, "res": {"Yes": 0.9868125544720807, "No": 0.013186825710637538}, "ground_truth": 0}, {"key": "30548367", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9907593090431438, "res": {"Yes": 0.9907593090431438, "No": 0.009237632029831987}, "ground_truth": 0}, {"key": "37919402", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.3229702387814957, "res": {"No": 0.6770266107322748, "Yes": 0.3229702387814957}, "ground_truth": 0}, {"key": "37919402", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9713677248682651, "res": {"Yes": 0.9713677248682651, "No": 0.028628687627218506}, "ground_truth": 0}, {"key": "37919402", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984948438853906, "res": {"Yes": 0.9984948438853906, "No": 0.001504886197378741}, "ground_truth": 1}, {"key": "37919402", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9911253651201839, "res": {"Yes": 0.9911253651201839, "No": 0.00887388132014318}, "ground_truth": 0}, {"key": "37919402", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9575476204620669, "res": {"Yes": 0.9575476204620669, "No": 0.04244974811198927}, "ground_truth": 0}, {"key": "39995133", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9818910135027109, "res": {"Yes": 0.9818910135027109, "No": 0.01810835943665505}, "ground_truth": 0}, {"key": "39995133", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9845145804574283, "res": {"Yes": 0.9845145804574283, "No": 0.015485208515553585}, "ground_truth": 0}, {"key": "39995133", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9760742457472356, "res": {"Yes": 0.9760742457472356, "No": 0.023925190004566096}, "ground_truth": 1}, {"key": "39995133", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9866935811614617, "res": {"Yes": 0.9866935811614617, "No": 0.013305896448619377}, "ground_truth": 0}, {"key": "39995133", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.950448662080966, "res": {"Yes": 0.950448662080966, "No": 0.049550526134907534}, "ground_truth": 0}, {"key": "40249088", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9550149014601007, "res": {"Yes": 0.9550149014601007, "No": 0.04498400485792303}, "ground_truth": 0}, {"key": "40249088", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9816034253830399, "res": {"Yes": 0.9816034253830399, "No": 0.01839613299076556}, "ground_truth": 0}, {"key": "40249088", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9785281735204531, "res": {"Yes": 0.9785281735204531, "No": 0.021471225265414208}, "ground_truth": 1}, {"key": "40249088", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9972133978038044, "res": {"Yes": 0.9972133978038044, "No": 0.002786431037638604}, "ground_truth": 0}, {"key": "40249088", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9887015088355078, "res": {"Yes": 0.9887015088355078, "No": 0.011298164500970868}, "ground_truth": 0}, {"key": "40254388", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.995537497684934, "res": {"Yes": 0.995537497684934, "No": 0.004461820109046515}, "ground_truth": 0}, {"key": "40254388", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9400824291289193, "res": {"Yes": 0.9400824291289193, "No": 0.059916856665659275}, "ground_truth": 0}, {"key": "40254388", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999800413250086, "res": {"Yes": 0.999800413250086, "No": 0.00019936544706113622}, "ground_truth": 1}, {"key": "40254388", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9980687087475996, "res": {"Yes": 0.9980687087475996, "No": 0.0019310816357851384}, "ground_truth": 0}, {"key": "40254388", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9922338307422353, "res": {"Yes": 0.9922338307422353, "No": 0.0077654909373111956}, "ground_truth": 0}, {"key": "31995230", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9414248966078171, "res": {"Yes": 0.9414248966078171, "No": 0.05857111043215397}, "ground_truth": 0}, {"key": "31995230", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9962751704523837, "res": {"Yes": 0.9962751704523837, "No": 0.003724850359141808}, "ground_truth": 0}, {"key": "31995230", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9945736332066081, "res": {"Yes": 0.9945736332066081, "No": 0.005426283595919492}, "ground_truth": 1}, {"key": "31995230", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988398577859218, "res": {"Yes": 0.9988398577859218, "No": 0.001159888962996738}, "ground_truth": 0}, {"key": "31995230", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9952288721632385, "res": {"Yes": 0.9952288721632385, "No": 0.004770858553626986}, "ground_truth": 0}, {"key": "38632129", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.75235494880699, "res": {"Yes": 0.75235494880699, "No": 0.24764267460843833}, "ground_truth": 0}, {"key": "38632129", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8009183918004583, "res": {"Yes": 0.8009183918004583, "No": 0.19907480905504157}, "ground_truth": 0}, {"key": "38632129", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9759114081763846, "res": {"Yes": 0.9759114081763846, "No": 0.024085554898297466}, "ground_truth": 1}, {"key": "38632129", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.974968145305064, "res": {"Yes": 0.974968145305064, "No": 0.02502921090510626}, "ground_truth": 0}, {"key": "38632129", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.5342205181112434, "res": {"Yes": 0.5342205181112434, "No": 0.46577237493476564}, "ground_truth": 0}, {"key": "35720795", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9333190548303749, "res": {"Yes": 0.9333190548303749, "No": 0.06667863448089979}, "ground_truth": 0}, {"key": "35720795", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9274636431768465, "res": {"Yes": 0.9274636431768465, "No": 0.07253574530032085}, "ground_truth": 0}, {"key": "35720795", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7350143030696026, "res": {"Yes": 0.7350143030696026, "No": 0.2649846709872904}, "ground_truth": 1}, {"key": "35720795", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9741805479625685, "res": {"Yes": 0.9741805479625685, "No": 0.025818324170199858}, "ground_truth": 0}, {"key": "35720795", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9951679467137508, "res": {"Yes": 0.9951679467137508, "No": 0.004830631859990594}, "ground_truth": 0}, {"key": "23906759", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.11810979748385202, "res": {"No": 0.8818864083395459, "Yes": 0.11810979748385202}, "ground_truth": 0}, {"key": "23906759", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.771885659023444, "res": {"Yes": 0.771885659023444, "No": 0.22811326433035298}, "ground_truth": 0}, {"key": "23906759", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7740357947613903, "res": {"Yes": 0.7740357947613903, "No": 0.22596297964685164}, "ground_truth": 1}, {"key": "23906759", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2277893911383908, "res": {"No": 0.7722083128951199, "Yes": 0.2277893911383908}, "ground_truth": 0}, {"key": "23906759", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.23685193024355666, "res": {"No": 0.763142463641733, "Yes": 0.23685193024355666}, "ground_truth": 0}, {"key": "19410108", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.03713750884590885, "res": {"No": 0.9628369819287311, "Yes": 0.03713750884590885}, "ground_truth": 0}, {"key": "19410108", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9765808328061746, "res": {"Yes": 0.9765808328061746, "No": 0.02341879907222185}, "ground_truth": 0}, {"key": "19410108", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9918450217269276, "res": {"Yes": 0.9918450217269276, "No": 0.008154706775195983}, "ground_truth": 1}, {"key": "19410108", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9901158960382207, "res": {"Yes": 0.9901158960382207, "No": 0.009883485954986103}, "ground_truth": 0}, {"key": "19410108", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9926192832575692, "res": {"Yes": 0.9926192832575692, "No": 0.007379975205970445}, "ground_truth": 0}, {"key": "30745137", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6609909702269053, "res": {"Yes": 0.6609909702269053, "No": 0.33900785526398103}, "ground_truth": 0}, {"key": "30745137", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.7116569040814148, "res": {"Yes": 0.7116569040814148, "No": 0.2883349943452469}, "ground_truth": 0}, {"key": "30745137", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9348301125593325, "res": {"Yes": 0.9348301125593325, "No": 0.06516783890986111}, "ground_truth": 1}, {"key": "30745137", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9479554286731245, "res": {"Yes": 0.9479554286731245, "No": 0.05204122947710414}, "ground_truth": 0}, {"key": "30745137", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9941218526100416, "res": {"Yes": 0.9941218526100416, "No": 0.005877849589696683}, "ground_truth": 0}, {"key": "26553115", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.1255838017780794, "res": {"No": 0.8744154089173288, "Yes": 0.1255838017780794}, "ground_truth": 0}, {"key": "26553115", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9909619088248558, "res": {"Yes": 0.9909619088248558, "No": 0.009037546918278223}, "ground_truth": 0}, {"key": "26553115", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9966611653464792, "res": {"Yes": 0.9966611653464792, "No": 0.003338112671893982}, "ground_truth": 1}, {"key": "26553115", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.996664718250534, "res": {"Yes": 0.996664718250534, "No": 0.003334765222978818}, "ground_truth": 0}, {"key": "26553115", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990845561745998, "res": {"Yes": 0.9990845561745998, "No": 0.0009151432020149505}, "ground_truth": 0}, {"key": "37872311", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7145069889961094, "res": {"Yes": 0.7145069889961094, "No": 0.28549016466188887}, "ground_truth": 0}, {"key": "37872311", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.7160448459317689, "res": {"Yes": 0.7160448459317689, "No": 0.28394966285463674}, "ground_truth": 0}, {"key": "37872311", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9975123385708835, "res": {"Yes": 0.9975123385708835, "No": 0.002487087330451342}, "ground_truth": 1}, {"key": "37872311", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9765967531566774, "res": {"Yes": 0.9765967531566774, "No": 0.023401681505285384}, "ground_truth": 0}, {"key": "37872311", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9444300642916512, "res": {"Yes": 0.9444300642916512, "No": 0.05556866587424549}, "ground_truth": 0}, {"key": "35553131", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9937620595410585, "res": {"Yes": 0.9937620595410585, "No": 0.006237899948251106}, "ground_truth": 0}, {"key": "35553131", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987589918882529, "res": {"Yes": 0.9987589918882529, "No": 0.0012409984769956813}, "ground_truth": 0}, {"key": "35553131", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9965849167075165, "res": {"Yes": 0.9965849167075165, "No": 0.0034150709847890364}, "ground_truth": 1}, {"key": "35553131", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9953849825709017, "res": {"Yes": 0.9953849825709017, "No": 0.004614947901685158}, "ground_truth": 0}, {"key": "35553131", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9939747207400643, "res": {"Yes": 0.9939747207400643, "No": 0.0060251693025012995}, "ground_truth": 0}, {"key": "39038936", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5373141748311813, "res": {"Yes": 0.5373141748311813, "No": 0.4626851674958546}, "ground_truth": 0}, {"key": "39038936", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9754025708712007, "res": {"Yes": 0.9754025708712007, "No": 0.024596635836731356}, "ground_truth": 0}, {"key": "39038936", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9910045255626131, "res": {"Yes": 0.9910045255626131, "No": 0.008995078361000193}, "ground_truth": 1}, {"key": "39038936", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988623358311183, "res": {"Yes": 0.9988623358311183, "No": 0.0011374108316159319}, "ground_truth": 0}, {"key": "39038936", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997927857718344, "res": {"Yes": 0.9997927857718344, "No": 0.00020666839618317459}, "ground_truth": 0}, {"key": "38735486", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.3568209370232459, "res": {"No": 0.6431634679961974, "Yes": 0.3568209370232459}, "ground_truth": 0}, {"key": "38735486", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9598097032028081, "res": {"Yes": 0.9598097032028081, "No": 0.040188556020180954}, "ground_truth": 0}, {"key": "38735486", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9937432226616212, "res": {"Yes": 0.9937432226616212, "No": 0.0062557014441482145}, "ground_truth": 1}, {"key": "38735486", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9896701412800927, "res": {"Yes": 0.9896701412800927, "No": 0.010329198380241508}, "ground_truth": 0}, {"key": "38735486", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991336958083327, "res": {"Yes": 0.9991336958083327, "No": 0.000864251278367171}, "ground_truth": 0}, {"key": "17087845", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.07124193921913578, "res": {"No": 0.9287560024843655, "Yes": 0.07124193921913578}, "ground_truth": 0}, {"key": "17087845", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9639849370118988, "res": {"Yes": 0.9639849370118988, "No": 0.03601418825412696}, "ground_truth": 0}, {"key": "17087845", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9491940147628359, "res": {"Yes": 0.9491940147628359, "No": 0.05080531011691675}, "ground_truth": 1}, {"key": "17087845", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8798268079738204, "res": {"Yes": 0.8798268079738204, "No": 0.12017178718861102}, "ground_truth": 0}, {"key": "17087845", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.942429738923549, "res": {"Yes": 0.942429738923549, "No": 0.0575686805276707}, "ground_truth": 0}, {"key": "37443011", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5699729758881551, "res": {"Yes": 0.5699729758881551, "No": 0.43002486774569504}, "ground_truth": 0}, {"key": "37443011", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9906964716313563, "res": {"Yes": 0.9906964716313563, "No": 0.009302429280277691}, "ground_truth": 0}, {"key": "37443011", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9781289391653781, "res": {"Yes": 0.9781289391653781, "No": 0.021868664752460642}, "ground_truth": 1}, {"key": "37443011", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9949974950145798, "res": {"Yes": 0.9949974950145798, "No": 0.005001878685597756}, "ground_truth": 0}, {"key": "37443011", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.7663555288805429, "res": {"Yes": 0.7663555288805429, "No": 0.23364142750037592}, "ground_truth": 0}, {"key": "36855749", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.968474831079876, "res": {"Yes": 0.968474831079876, "No": 0.03152402580211246}, "ground_truth": 0}, {"key": "36855749", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9455276760255162, "res": {"Yes": 0.9455276760255162, "No": 0.054471535399479104}, "ground_truth": 0}, {"key": "36855749", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9895169710001855, "res": {"Yes": 0.9895169710001855, "No": 0.010482233526909185}, "ground_truth": 1}, {"key": "36855749", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9723798555231219, "res": {"Yes": 0.9723798555231219, "No": 0.02761872484168399}, "ground_truth": 0}, {"key": "36855749", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9710236593740159, "res": {"Yes": 0.9710236593740159, "No": 0.028975344507737467}, "ground_truth": 0}, {"key": "35613141", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9894120462048627, "res": {"Yes": 0.9894120462048627, "No": 0.010585999000113614}, "ground_truth": 0}, {"key": "35613141", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.948809113968629, "res": {"Yes": 0.948809113968629, "No": 0.051188289182651496}, "ground_truth": 0}, {"key": "35613141", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9856132428105265, "res": {"Yes": 0.9856132428105265, "No": 0.014384680397313013}, "ground_truth": 1}, {"key": "35613141", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9119877691659334, "res": {"Yes": 0.9119877691659334, "No": 0.08800792603326765}, "ground_truth": 0}, {"key": "35613141", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.985210303015138, "res": {"Yes": 0.985210303015138, "No": 0.01478884054286695}, "ground_truth": 0}, {"key": "39088847", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9857613758418425, "res": {"Yes": 0.9857613758418425, "No": 0.014237846690319281}, "ground_truth": 0}, {"key": "39088847", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.99718779118834, "res": {"Yes": 0.99718779118834, "No": 0.0028117393538352967}, "ground_truth": 0}, {"key": "39088847", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993894958335514, "res": {"Yes": 0.9993894958335514, "No": 0.0006101797329595562}, "ground_truth": 1}, {"key": "39088847", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9975977432481038, "res": {"Yes": 0.9975977432481038, "No": 0.0024020169469488595}, "ground_truth": 0}, {"key": "39088847", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9826192878879839, "res": {"Yes": 0.9826192878879839, "No": 0.017379759447602932}, "ground_truth": 0}, {"key": "33197277", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8053388568078841, "res": {"Yes": 0.8053388568078841, "No": 0.19466055127474508}, "ground_truth": 0}, {"key": "33197277", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9893744611339468, "res": {"Yes": 0.9893744611339468, "No": 0.010624147528523682}, "ground_truth": 0}, {"key": "33197277", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9837535171589904, "res": {"Yes": 0.9837535171589904, "No": 0.016245472896576085}, "ground_truth": 1}, {"key": "33197277", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9753389513379594, "res": {"Yes": 0.9753389513379594, "No": 0.024659757870713558}, "ground_truth": 0}, {"key": "33197277", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8686393648723768, "res": {"Yes": 0.8686393648723768, "No": 0.13135972851568112}, "ground_truth": 0}, {"key": "33815489", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9934900551051031, "res": {"Yes": 0.9934900551051031, "No": 0.0065096473157322185}, "ground_truth": 0}, {"key": "33815489", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9776806832542453, "res": {"Yes": 0.9776806832542453, "No": 0.02231877559117362}, "ground_truth": 0}, {"key": "33815489", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9971485561108006, "res": {"Yes": 0.9971485561108006, "No": 0.0028509537259416534}, "ground_truth": 1}, {"key": "33815489", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.996654882912624, "res": {"Yes": 0.996654882912624, "No": 0.003345038753393994}, "ground_truth": 0}, {"key": "33815489", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9967008342469469, "res": {"Yes": 0.9967008342469469, "No": 0.003298901647507163}, "ground_truth": 0}, {"key": "35862754", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.261376887456679, "res": {"No": 0.7386186842782528, "Yes": 0.261376887456679}, "ground_truth": 0}, {"key": "35862754", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9964757613583275, "res": {"Yes": 0.9964757613583275, "No": 0.0035236954370422495}, "ground_truth": 0}, {"key": "35862754", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9875302527301714, "res": {"Yes": 0.9875302527301714, "No": 0.012468407923189609}, "ground_truth": 1}, {"key": "35862754", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9572439622840669, "res": {"Yes": 0.9572439622840669, "No": 0.04275394958130844}, "ground_truth": 0}, {"key": "35862754", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9821201222746991, "res": {"Yes": 0.9821201222746991, "No": 0.017878632397458568}, "ground_truth": 0}, {"key": "36080615", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.013541616928221015, "res": {"No": 0.9864578167013708, "Yes": 0.013541616928221015}, "ground_truth": 0}, {"key": "36080615", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9936637661302515, "res": {"Yes": 0.9936637661302515, "No": 0.006335940596920464}, "ground_truth": 0}, {"key": "36080615", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998950318795388, "res": {"Yes": 0.9998950318795388, "No": 0.00010482272164368344}, "ground_truth": 1}, {"key": "36080615", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999176770779259, "res": {"Yes": 0.999176770779259, "No": 0.0008228373561205875}, "ground_truth": 0}, {"key": "36080615", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9962140001382457, "res": {"Yes": 0.9962140001382457, "No": 0.0037857297884174228}, "ground_truth": 0}, {"key": "22822742", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8085962171964907, "res": {"Yes": 0.8085962171964907, "No": 0.1914016460400503}, "ground_truth": 0}, {"key": "22822742", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.941642582973622, "res": {"Yes": 0.941642582973622, "No": 0.05835469854572404}, "ground_truth": 0}, {"key": "22822742", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9938356371282193, "res": {"Yes": 0.9938356371282193, "No": 0.006164035514137894}, "ground_truth": 1}, {"key": "22822742", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9975967955306979, "res": {"Yes": 0.9975967955306979, "No": 0.0024027662528828004}, "ground_truth": 0}, {"key": "22822742", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9721656317535645, "res": {"Yes": 0.9721656317535645, "No": 0.027832502125329072}, "ground_truth": 0}, {"key": "39747536", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7736583740688608, "res": {"Yes": 0.7736583740688608, "No": 0.2263340440418368}, "ground_truth": 0}, {"key": "39747536", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.23363855372849138, "res": {"No": 0.7663489995592518, "Yes": 0.23363855372849138}, "ground_truth": 0}, {"key": "39747536", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5900776326356312, "res": {"Yes": 0.5900776326356312, "No": 0.40991685193871097}, "ground_truth": 1}, {"key": "39747536", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7640776567655738, "res": {"Yes": 0.7640776567655738, "No": 0.23591776218558286}, "ground_truth": 0}, {"key": "39747536", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8843936854055223, "res": {"Yes": 0.8843936854055223, "No": 0.11560077715180248}, "ground_truth": 0}, {"key": "34218396", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9845158514665721, "res": {"Yes": 0.9845158514665721, "No": 0.015483211052494212}, "ground_truth": 0}, {"key": "34218396", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9865593225140424, "res": {"Yes": 0.9865593225140424, "No": 0.013439569172748436}, "ground_truth": 0}, {"key": "34218396", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8770206857008815, "res": {"Yes": 0.8770206857008815, "No": 0.12297822153227865}, "ground_truth": 1}, {"key": "34218396", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9855335808828372, "res": {"Yes": 0.9855335808828372, "No": 0.014465116307081754}, "ground_truth": 0}, {"key": "34218396", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.990024983810209, "res": {"Yes": 0.990024983810209, "No": 0.009974121402852594}, "ground_truth": 0}, {"key": "39150388", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9830612387132146, "res": {"Yes": 0.9830612387132146, "No": 0.01693852611062706}, "ground_truth": 0}, {"key": "39150388", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9984250777936532, "res": {"Yes": 0.9984250777936532, "No": 0.0015747007889845952}, "ground_truth": 0}, {"key": "39150388", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996693569744574, "res": {"Yes": 0.9996693569744574, "No": 0.0003304725361524292}, "ground_truth": 1}, {"key": "39150388", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999946523779904, "res": {"Yes": 0.999946523779904, "No": 5.3340535494171054e-05}, "ground_truth": 0}, {"key": "39150388", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.16342220700618237, "res": {"No": 0.8365760568289098, "Yes": 0.16342220700618237}, "ground_truth": 0}, {"key": "28765782", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9984818884988393, "res": {"Yes": 0.9984818884988393, "No": 0.00151774590808129}, "ground_truth": 0}, {"key": "28765782", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9984828407514705, "res": {"Yes": 0.9984828407514705, "No": 0.0015169022759078748}, "ground_truth": 0}, {"key": "28765782", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997078404369196, "res": {"Yes": 0.9997078404369196, "No": 0.0002916990318757343}, "ground_truth": 1}, {"key": "28765782", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9941655591439198, "res": {"Yes": 0.9941655591439198, "No": 0.005834054883826728}, "ground_truth": 0}, {"key": "28765782", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992485463827934, "res": {"Yes": 0.9992485463827934, "No": 0.0007512245121086634}, "ground_truth": 0}, {"key": "35828022", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8841614917976415, "res": {"Yes": 0.8841614917976415, "No": 0.11583702878203912}, "ground_truth": 0}, {"key": "35828022", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9381002118701264, "res": {"Yes": 0.9381002118701264, "No": 0.061899201451300144}, "ground_truth": 0}, {"key": "35828022", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9590324743024731, "res": {"Yes": 0.9590324743024731, "No": 0.04096691865661429}, "ground_truth": 1}, {"key": "35828022", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9787601748218723, "res": {"Yes": 0.9787601748218723, "No": 0.021239230756532477}, "ground_truth": 0}, {"key": "35828022", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.5478884889817737, "res": {"Yes": 0.5478884889817737, "No": 0.4521099845947941}, "ground_truth": 0}, {"key": "27717735", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8856669428976701, "res": {"Yes": 0.8856669428976701, "No": 0.1143320894488055}, "ground_truth": 0}, {"key": "27717735", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9982753780724708, "res": {"Yes": 0.9982753780724708, "No": 0.0017245398805365625}, "ground_truth": 0}, {"key": "27717735", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9975763895976717, "res": {"Yes": 0.9975763895976717, "No": 0.0024233824695921546}, "ground_truth": 1}, {"key": "27717735", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9947426381561463, "res": {"Yes": 0.9947426381561463, "No": 0.005255153840554487}, "ground_truth": 0}, {"key": "27717735", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9320654047392197, "res": {"Yes": 0.9320654047392197, "No": 0.0679337712915446}, "ground_truth": 0}, {"key": "37977826", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.09792883867944299, "res": {"No": 0.9020703036450606, "Yes": 0.09792883867944299}, "ground_truth": 0}, {"key": "37977826", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9935812581015869, "res": {"Yes": 0.9935812581015869, "No": 0.006416603828021601}, "ground_truth": 0}, {"key": "37977826", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9953519244933628, "res": {"Yes": 0.9953519244933628, "No": 0.004646929035643942}, "ground_truth": 1}, {"key": "37977826", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9948267495894464, "res": {"Yes": 0.9948267495894464, "No": 0.005172204391509657}, "ground_truth": 0}, {"key": "37977826", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9879996850595082, "res": {"Yes": 0.9879996850595082, "No": 0.011999604356852012}, "ground_truth": 0}, {"key": "31768588", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9689499205556752, "res": {"Yes": 0.9689499205556752, "No": 0.03104956749522453}, "ground_truth": 0}, {"key": "31768588", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994209211808954, "res": {"Yes": 0.9994209211808954, "No": 0.000578852130931684}, "ground_truth": 0}, {"key": "31768588", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9928680082275034, "res": {"Yes": 0.9928680082275034, "No": 0.00713133161301567}, "ground_truth": 1}, {"key": "31768588", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9952807060278976, "res": {"Yes": 0.9952807060278976, "No": 0.004718687925096046}, "ground_truth": 0}, {"key": "31768588", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9925024277778549, "res": {"Yes": 0.9925024277778549, "No": 0.007497309920849488}, "ground_truth": 0}, {"key": "37183351", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.98773165043312, "res": {"Yes": 0.98773165043312, "No": 0.012267583296610443}, "ground_truth": 0}, {"key": "37183351", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997270194659681, "res": {"Yes": 0.9997270194659681, "No": 0.00027252369323209157}, "ground_truth": 0}, {"key": "37183351", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9974877874010846, "res": {"Yes": 0.9974877874010846, "No": 0.0025112315218764523}, "ground_truth": 1}, {"key": "37183351", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9967324510955664, "res": {"Yes": 0.9967324510955664, "No": 0.003267144229352165}, "ground_truth": 0}, {"key": "37183351", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987003722485646, "res": {"Yes": 0.9987003722485646, "No": 0.0012992839161630563}, "ground_truth": 0}, {"key": "39622090", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9314273727895171, "res": {"Yes": 0.9314273727895171, "No": 0.06857021652286155}, "ground_truth": 0}, {"key": "39622090", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.0005842306526159458, "res": {"No": 0.999415206418459, "Yes": 0.0005842306526159458}, "ground_truth": 0}, {"key": "39622090", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9399044103556403, "res": {"Yes": 0.9399044103556403, "No": 0.06009432907776818}, "ground_truth": 1}, {"key": "39622090", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5748664649643975, "res": {"Yes": 0.5748664649643975, "No": 0.42513010393462525}, "ground_truth": 0}, {"key": "39622090", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.030622672898259876, "res": {"No": 0.9693743844968321, "Yes": 0.030622672898259876}, "ground_truth": 0}, {"key": "39272756", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5546046840607151, "res": {"Yes": 0.5546046840607151, "No": 0.4453943542161432}, "ground_truth": 0}, {"key": "39272756", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9336012734404092, "res": {"Yes": 0.9336012734404092, "No": 0.06639771737321905}, "ground_truth": 0}, {"key": "39272756", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9942260025688529, "res": {"Yes": 0.9942260025688529, "No": 0.00577352538366634}, "ground_truth": 1}, {"key": "39272756", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9816856754165807, "res": {"Yes": 0.9816856754165807, "No": 0.01831344114393486}, "ground_truth": 0}, {"key": "39272756", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9971294740608972, "res": {"Yes": 0.9971294740608972, "No": 0.0028694353807809016}, "ground_truth": 0}, {"key": "32138822", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.005010140101024545, "res": {"No": 0.994988769322808, "Yes": 0.005010140101024545}, "ground_truth": 0}, {"key": "32138822", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9883889654332128, "res": {"Yes": 0.9883889654332128, "No": 0.011609459953352528}, "ground_truth": 0}, {"key": "32138822", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9944337930562239, "res": {"Yes": 0.9944337930562239, "No": 0.005565613821376872}, "ground_truth": 1}, {"key": "32138822", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9498050053329751, "res": {"Yes": 0.9498050053329751, "No": 0.05019439057810544}, "ground_truth": 0}, {"key": "32138822", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9638547885522236, "res": {"Yes": 0.9638547885522236, "No": 0.03614501635669609}, "ground_truth": 0}, {"key": "31070114", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9601621332291452, "res": {"Yes": 0.9601621332291452, "No": 0.039835540012887825}, "ground_truth": 0}, {"key": "31070114", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996923522632732, "res": {"Yes": 0.9996923522632732, "No": 0.00030731139760815414}, "ground_truth": 0}, {"key": "31070114", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998008899750327, "res": {"Yes": 0.9998008899750327, "No": 0.00019870862543757897}, "ground_truth": 1}, {"key": "31070114", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985220641245026, "res": {"Yes": 0.9985220641245026, "No": 0.0014774129906427133}, "ground_truth": 0}, {"key": "31070114", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992698423948916, "res": {"Yes": 0.9992698423948916, "No": 0.0007293420730530445}, "ground_truth": 0}, {"key": "39652762", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9384262771095698, "res": {"Yes": 0.9384262771095698, "No": 0.06157258846084289}, "ground_truth": 0}, {"key": "39652762", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8929629574366379, "res": {"Yes": 0.8929629574366379, "No": 0.10703681909700583}, "ground_truth": 0}, {"key": "39652762", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.966448155753615, "res": {"Yes": 0.966448155753615, "No": 0.0335499602622107}, "ground_truth": 1}, {"key": "39652762", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9807717178630031, "res": {"Yes": 0.9807717178630031, "No": 0.01922659888650966}, "ground_truth": 0}, {"key": "39652762", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9367104524754161, "res": {"Yes": 0.9367104524754161, "No": 0.06326043403496816}, "ground_truth": 0}, {"key": "33258866", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8533660935795634, "res": {"Yes": 0.8533660935795634, "No": 0.14663112494867075}, "ground_truth": 0}, {"key": "33258866", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.6067731081348166, "res": {"Yes": 0.6067731081348166, "No": 0.39322046687148116}, "ground_truth": 0}, {"key": "33258866", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9907665703447288, "res": {"Yes": 0.9907665703447288, "No": 0.009231534434745252}, "ground_truth": 1}, {"key": "33258866", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9674325425224534, "res": {"Yes": 0.9674325425224534, "No": 0.03256576160603234}, "ground_truth": 0}, {"key": "33258866", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9386903827290748, "res": {"Yes": 0.9386903827290748, "No": 0.06130885456869426}, "ground_truth": 0}, {"key": "36962388", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.23338939434183464, "res": {"No": 0.7666041979404481, "Yes": 0.23338939434183464}, "ground_truth": 0}, {"key": "36962388", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9928870397038821, "res": {"Yes": 0.9928870397038821, "No": 0.007112339824958521}, "ground_truth": 0}, {"key": "36962388", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9964638116926476, "res": {"Yes": 0.9964638116926476, "No": 0.0035356645704656803}, "ground_truth": 1}, {"key": "36962388", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9945443879005372, "res": {"Yes": 0.9945443879005372, "No": 0.0054538270812646145}, "ground_truth": 0}, {"key": "36962388", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9878028309159866, "res": {"Yes": 0.9878028309159866, "No": 0.01219525667469847}, "ground_truth": 0}, {"key": "32282272", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 4.926892959143625e-06, "res": {"No": 0.9999946806438478, "Yes": 4.926892959143625e-06}, "ground_truth": 0}, {"key": "32282272", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9845036031810547, "res": {"Yes": 0.9845036031810547, "No": 0.015495370144989824}, "ground_truth": 0}, {"key": "32282272", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9791859948219713, "res": {"Yes": 0.9791859948219713, "No": 0.020812781204612895}, "ground_truth": 1}, {"key": "32282272", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8953462894081184, "res": {"Yes": 0.8953462894081184, "No": 0.10464898804882465}, "ground_truth": 0}, {"key": "32282272", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8340280617559965, "res": {"Yes": 0.8340280617559965, "No": 0.16596935767236606}, "ground_truth": 0}, {"key": "36093072", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9631910831475198, "res": {"Yes": 0.9631910831475198, "No": 0.03680851774971647}, "ground_truth": 0}, {"key": "36093072", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9856325817176875, "res": {"Yes": 0.9856325817176875, "No": 0.014367358733434312}, "ground_truth": 0}, {"key": "36093072", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9930158632391125, "res": {"Yes": 0.9930158632391125, "No": 0.006983507369121923}, "ground_truth": 1}, {"key": "36093072", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9912537320960656, "res": {"Yes": 0.9912537320960656, "No": 0.008746173399839765}, "ground_truth": 0}, {"key": "36093072", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.6342785295254979, "res": {"Yes": 0.6342785295254979, "No": 0.3657210846122598}, "ground_truth": 0}, {"key": "38879972", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5004882785408018, "res": {"Yes": 0.5004882785408018, "No": 0.49950981072266304}, "ground_truth": 0}, {"key": "38879972", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9602862258803689, "res": {"Yes": 0.9602862258803689, "No": 0.03971353171978815}, "ground_truth": 0}, {"key": "38879972", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9590481546116141, "res": {"Yes": 0.9590481546116141, "No": 0.040950707158677074}, "ground_truth": 1}, {"key": "38879972", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.22444840949776465, "res": {"No": 0.7755254572643114, "Yes": 0.22444840949776465}, "ground_truth": 0}, {"key": "38879972", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.960440157694235, "res": {"Yes": 0.960440157694235, "No": 0.039559468800891284}, "ground_truth": 0}, {"key": "32106473", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9926035478851042, "res": {"Yes": 0.9926035478851042, "No": 0.007396232722183223}, "ground_truth": 0}, {"key": "32106473", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9920038437071328, "res": {"Yes": 0.9920038437071328, "No": 0.007995236117392852}, "ground_truth": 0}, {"key": "32106473", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9910686862634941, "res": {"Yes": 0.9910686862634941, "No": 0.00893049280755445}, "ground_truth": 1}, {"key": "32106473", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9978545427904619, "res": {"Yes": 0.9978545427904619, "No": 0.0021450744898550093}, "ground_truth": 0}, {"key": "32106473", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9762513480345013, "res": {"Yes": 0.9762513480345013, "No": 0.023747840000139415}, "ground_truth": 0}, {"key": "40415815", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.00022475581764196267, "res": {"No": 0.9997745552364907, "Yes": 0.00022475581764196267}, "ground_truth": 0}, {"key": "40415815", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9946943927166074, "res": {"Yes": 0.9946943927166074, "No": 0.005305214893542699}, "ground_truth": 0}, {"key": "40415815", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9978216607321461, "res": {"Yes": 0.9978216607321461, "No": 0.0021779439269604575}, "ground_truth": 1}, {"key": "40415815", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987836000149499, "res": {"Yes": 0.9987836000149499, "No": 0.0012160687003934154}, "ground_truth": 0}, {"key": "40415815", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9947874610755564, "res": {"Yes": 0.9947874610755564, "No": 0.005212126755919562}, "ground_truth": 0}, {"key": "34581918", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8479265606055402, "res": {"Yes": 0.8479265606055402, "No": 0.15207184581097508}, "ground_truth": 0}, {"key": "34581918", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986010997743918, "res": {"Yes": 0.9986010997743918, "No": 0.0013975480090513113}, "ground_truth": 0}, {"key": "34581918", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.996246525960478, "res": {"Yes": 0.996246525960478, "No": 0.0037524249299495183}, "ground_truth": 1}, {"key": "34581918", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9911368424183654, "res": {"Yes": 0.9911368424183654, "No": 0.00886048832673199}, "ground_truth": 0}, {"key": "34581918", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993852070626714, "res": {"Yes": 0.9993852070626714, "No": 0.0006142384808093262}, "ground_truth": 0}, {"key": "33004157", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.996106001252644, "res": {"Yes": 0.996106001252644, "No": 0.003893935251231433}, "ground_truth": 0}, {"key": "33004157", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9971771260227905, "res": {"Yes": 0.9971771260227905, "No": 0.0028227688896342107}, "ground_truth": 0}, {"key": "33004157", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992919873593172, "res": {"Yes": 0.9992919873593172, "No": 0.0007077143543415432}, "ground_truth": 1}, {"key": "33004157", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9977105840195809, "res": {"Yes": 0.9977105840195809, "No": 0.0022892042749232057}, "ground_truth": 0}, {"key": "33004157", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998433152563369, "res": {"Yes": 0.9998433152563369, "No": 0.0001563095846937749}, "ground_truth": 0}, {"key": "30334943", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9953077477739232, "res": {"Yes": 0.9953077477739232, "No": 0.004691623473620403}, "ground_truth": 0}, {"key": "30334943", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.99814792441259, "res": {"Yes": 0.99814792441259, "No": 0.0018519091482165804}, "ground_truth": 0}, {"key": "30334943", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9886721408820643, "res": {"Yes": 0.9886721408820643, "No": 0.011326826956064672}, "ground_truth": 1}, {"key": "30334943", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.991591540519089, "res": {"Yes": 0.991591540519089, "No": 0.008407593444415645}, "ground_truth": 0}, {"key": "30334943", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9745647869471367, "res": {"Yes": 0.9745647869471367, "No": 0.02543379697094438}, "ground_truth": 0}, {"key": "33280503", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9902577917930847, "res": {"Yes": 0.9902577917930847, "No": 0.009740493453831144}, "ground_truth": 0}, {"key": "33280503", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9956490406692381, "res": {"Yes": 0.9956490406692381, "No": 0.0043500105873976016}, "ground_truth": 0}, {"key": "33280503", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.996009839231348, "res": {"Yes": 0.996009839231348, "No": 0.0039896490872726054}, "ground_truth": 1}, {"key": "33280503", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998169304870299, "res": {"Yes": 0.998169304870299, "No": 0.0018303660588025968}, "ground_truth": 0}, {"key": "33280503", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9834066251923437, "res": {"Yes": 0.9834066251923437, "No": 0.01659209337165919}, "ground_truth": 0}, {"key": "25726782", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7316652341678824, "res": {"Yes": 0.7316652341678824, "No": 0.26833418938020054}, "ground_truth": 0}, {"key": "25726782", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.997540335244806, "res": {"Yes": 0.997540335244806, "No": 0.0024596441351754242}, "ground_truth": 0}, {"key": "25726782", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996749577376861, "res": {"Yes": 0.9996749577376861, "No": 0.00032475808715949986}, "ground_truth": 1}, {"key": "25726782", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999428286693428, "res": {"Yes": 0.9999428286693428, "No": 5.70604115284132e-05}, "ground_truth": 0}, {"key": "25726782", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991524955349488, "res": {"Yes": 0.9991524955349488, "No": 0.000847363748810059}, "ground_truth": 0}, {"key": "35479854", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7059424926334239, "res": {"Yes": 0.7059424926334239, "No": 0.29405504537558275}, "ground_truth": 0}, {"key": "35479854", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.5675174052799892, "res": {"Yes": 0.5675174052799892, "No": 0.4324812959083673}, "ground_truth": 0}, {"key": "35479854", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3689699406178653, "res": {"No": 0.6310288224924724, "Yes": 0.3689699406178653}, "ground_truth": 1}, {"key": "35479854", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9890558932918152, "res": {"Yes": 0.9890558932918152, "No": 0.010942703410920344}, "ground_truth": 0}, {"key": "35479854", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.4310536364937491, "res": {"No": 0.56894415246386, "Yes": 0.4310536364937491}, "ground_truth": 0}, {"key": "32716226", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9941584928390714, "res": {"Yes": 0.9941584928390714, "No": 0.005840318184376892}, "ground_truth": 0}, {"key": "32716226", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.969046399826413, "res": {"Yes": 0.969046399826413, "No": 0.03095295833778141}, "ground_truth": 0}, {"key": "32716226", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9820520538879615, "res": {"Yes": 0.9820520538879615, "No": 0.017946610714678422}, "ground_truth": 1}, {"key": "32716226", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9963666286259815, "res": {"Yes": 0.9963666286259815, "No": 0.003632157335266624}, "ground_truth": 0}, {"key": "32716226", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9962549283391108, "res": {"Yes": 0.9962549283391108, "No": 0.0037440626015766277}, "ground_truth": 0}, {"key": "37047554", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.4254833764519204, "res": {"No": 0.5745151564827422, "Yes": 0.4254833764519204}, "ground_truth": 0}, {"key": "37047554", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9323416781719646, "res": {"Yes": 0.9323416781719646, "No": 0.06765747738652433}, "ground_truth": 0}, {"key": "37047554", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9751280590437508, "res": {"Yes": 0.9751280590437508, "No": 0.024871219186301634}, "ground_truth": 1}, {"key": "37047554", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9757318140642944, "res": {"Yes": 0.9757318140642944, "No": 0.024267033398166344}, "ground_truth": 0}, {"key": "37047554", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9922508322192394, "res": {"Yes": 0.9922508322192394, "No": 0.007748905278355244}, "ground_truth": 0}, {"key": "36565290", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9731685954135338, "res": {"Yes": 0.9731685954135338, "No": 0.026830177468818665}, "ground_truth": 0}, {"key": "36565290", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9984388619457426, "res": {"Yes": 0.9984388619457426, "No": 0.0015611024634203247}, "ground_truth": 0}, {"key": "36565290", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9761257037856934, "res": {"Yes": 0.9761257037856934, "No": 0.023873753581227737}, "ground_truth": 1}, {"key": "36565290", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9940268973335199, "res": {"Yes": 0.9940268973335199, "No": 0.005972372660277362}, "ground_truth": 0}, {"key": "36565290", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9789366514017942, "res": {"Yes": 0.9789366514017942, "No": 0.021062552042754333}, "ground_truth": 0}, {"key": "27758640", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 3.0465916226224522e-05, "res": {"No": 0.9999688139202959, "Yes": 3.0465916226224522e-05}, "ground_truth": 0}, {"key": "27758640", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9706026808115994, "res": {"Yes": 0.9706026808115994, "No": 0.02939700148926223}, "ground_truth": 0}, {"key": "27758640", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987111911284269, "res": {"Yes": 0.9987111911284269, "No": 0.0012887532510475134}, "ground_truth": 1}, {"key": "27758640", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9757857452277905, "res": {"Yes": 0.9757857452277905, "No": 0.024213910425232875}, "ground_truth": 0}, {"key": "27758640", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9566284188423284, "res": {"Yes": 0.9566284188423284, "No": 0.043370339203945445}, "ground_truth": 0}, {"key": "28897118", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9957304744504115, "res": {"Yes": 0.9957304744504115, "No": 0.004267543116032113}, "ground_truth": 0}, {"key": "28897118", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997539488355541, "res": {"Yes": 0.9997539488355541, "No": 0.0002459972994346735}, "ground_truth": 0}, {"key": "28897118", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998508202684029, "res": {"Yes": 0.9998508202684029, "No": 0.00014902767962149662}, "ground_truth": 1}, {"key": "28897118", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999374648254056, "res": {"Yes": 0.9999374648254056, "No": 6.249968757715079e-05}, "ground_truth": 0}, {"key": "28897118", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988425890173631, "res": {"Yes": 0.9988425890173631, "No": 0.001156613430794945}, "ground_truth": 0}, {"key": "38452661", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 4.036256114414518e-06, "res": {"No": 0.9999939654258081, "Yes": 4.036256114414518e-06}, "ground_truth": 0}, {"key": "38452661", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9730730952800899, "res": {"Yes": 0.9730730952800899, "No": 0.026925453850001037}, "ground_truth": 0}, {"key": "38452661", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9462406806477325, "res": {"Yes": 0.9462406806477325, "No": 0.053758445432043986}, "ground_truth": 1}, {"key": "38452661", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9941068905922807, "res": {"Yes": 0.9941068905922807, "No": 0.00589275882286899}, "ground_truth": 0}, {"key": "38452661", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9924326781507289, "res": {"Yes": 0.9924326781507289, "No": 0.007566225112816501}, "ground_truth": 0}, {"key": "38033492", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.997983346184286, "res": {"Yes": 0.997983346184286, "No": 0.002015709170186089}, "ground_truth": 0}, {"key": "38033492", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9956836702437774, "res": {"Yes": 0.9956836702437774, "No": 0.004315783046979238}, "ground_truth": 0}, {"key": "38033492", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995695426233722, "res": {"Yes": 0.9995695426233722, "No": 0.0004300056112255774}, "ground_truth": 1}, {"key": "38033492", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996099177718637, "res": {"Yes": 0.9996099177718637, "No": 0.00038932721432449844}, "ground_truth": 0}, {"key": "38033492", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9959856026228081, "res": {"Yes": 0.9959856026228081, "No": 0.004013654925665947}, "ground_truth": 0}, {"key": "35949555", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5446399744221717, "res": {"Yes": 0.5446399744221717, "No": 0.4553589363170069}, "ground_truth": 0}, {"key": "35949555", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.21357192880171896, "res": {"No": 0.7864252675552383, "Yes": 0.21357192880171896}, "ground_truth": 0}, {"key": "35949555", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7554924731944823, "res": {"Yes": 0.7554924731944823, "No": 0.24450647201708992}, "ground_truth": 1}, {"key": "35949555", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7749266649322294, "res": {"Yes": 0.7749266649322294, "No": 0.22507133107953695}, "ground_truth": 0}, {"key": "35949555", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9371948897889429, "res": {"Yes": 0.9371948897889429, "No": 0.06280252495665964}, "ground_truth": 0}, {"key": "15263826", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0006851395622112068, "res": {"No": 0.9993142482352896, "Yes": 0.0006851395622112068}, "ground_truth": 0}, {"key": "15263826", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.728613569869796, "res": {"Yes": 0.728613569869796, "No": 0.2713857604447501}, "ground_truth": 0}, {"key": "15263826", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9759312828145849, "res": {"Yes": 0.9759312828145849, "No": 0.02406795719808525}, "ground_truth": 1}, {"key": "15263826", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9431211855009488, "res": {"Yes": 0.9431211855009488, "No": 0.05687850939308107}, "ground_truth": 0}, {"key": "15263826", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9380994895332413, "res": {"Yes": 0.9380994895332413, "No": 0.06189963474722684}, "ground_truth": 0}, {"key": "37313866", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.03569847529257682, "res": {"No": 0.9643005366001186, "Yes": 0.03569847529257682}, "ground_truth": 0}, {"key": "37313866", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.03815202281437686, "res": {"No": 0.9618463455828564, "Yes": 0.03815202281437686}, "ground_truth": 0}, {"key": "37313866", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.015108168819728351, "res": {"No": 0.98489117612983, "Yes": 0.015108168819728351}, "ground_truth": 1}, {"key": "37313866", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5255008873942757, "res": {"Yes": 0.5255008873942757, "No": 0.47449700635675085}, "ground_truth": 0}, {"key": "37313866", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.7241852131823235, "res": {"Yes": 0.7241852131823235, "No": 0.27581291149942033}, "ground_truth": 0}, {"key": "13911157", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9984671519135484, "res": {"Yes": 0.9984671519135484, "No": 0.00153241294138378}, "ground_truth": 0}, {"key": "13911157", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.998561040097047, "res": {"Yes": 0.998561040097047, "No": 0.001438095658486111}, "ground_truth": 0}, {"key": "13911157", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9968366768830512, "res": {"Yes": 0.9968366768830512, "No": 0.003162985243559654}, "ground_truth": 1}, {"key": "13911157", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9873194118562322, "res": {"Yes": 0.9873194118562322, "No": 0.012680274271024587}, "ground_truth": 0}, {"key": "13911157", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988373610889336, "res": {"Yes": 0.9988373610889336, "No": 0.0011623662277818292}, "ground_truth": 0}, {"key": "39594894", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6430579078081222, "res": {"Yes": 0.6430579078081222, "No": 0.3569366728186674}, "ground_truth": 0}, {"key": "39594894", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.977852323319091, "res": {"Yes": 0.977852323319091, "No": 0.02214678366571516}, "ground_truth": 0}, {"key": "39594894", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9879651204951506, "res": {"Yes": 0.9879651204951506, "No": 0.012033702790152213}, "ground_truth": 1}, {"key": "39594894", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9969775475339567, "res": {"Yes": 0.9969775475339567, "No": 0.003020860060182206}, "ground_truth": 0}, {"key": "39594894", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9941128937236865, "res": {"Yes": 0.9941128937236865, "No": 0.005886660029303238}, "ground_truth": 0}, {"key": "34096170", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9829482374036755, "res": {"Yes": 0.9829482374036755, "No": 0.017050470164539443}, "ground_truth": 0}, {"key": "34096170", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9904309495444775, "res": {"Yes": 0.9904309495444775, "No": 0.009568354804990659}, "ground_truth": 0}, {"key": "34096170", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.986039109360798, "res": {"Yes": 0.986039109360798, "No": 0.013960218917521323}, "ground_truth": 1}, {"key": "34096170", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9957112082478528, "res": {"Yes": 0.9957112082478528, "No": 0.004288200919863106}, "ground_truth": 0}, {"key": "34096170", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9891237666140996, "res": {"Yes": 0.9891237666140996, "No": 0.010874790136355336}, "ground_truth": 0}, {"key": "37891952", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9886889242284443, "res": {"Yes": 0.9886889242284443, "No": 0.011310271317194242}, "ground_truth": 0}, {"key": "37891952", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9953199173766225, "res": {"Yes": 0.9953199173766225, "No": 0.004679902512181467}, "ground_truth": 0}, {"key": "37891952", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9889605093849193, "res": {"Yes": 0.9889605093849193, "No": 0.011038534406280021}, "ground_truth": 1}, {"key": "37891952", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991554694768116, "res": {"Yes": 0.9991554694768116, "No": 0.0008442077030358662}, "ground_truth": 0}, {"key": "37891952", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9903757523849358, "res": {"Yes": 0.9903757523849358, "No": 0.00962368049148494}, "ground_truth": 0}, {"key": "40186158", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.2164484210233875, "res": {"No": 0.7835449484795508, "Yes": 0.2164484210233875}, "ground_truth": 0}, {"key": "40186158", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987280743839463, "res": {"Yes": 0.9987280743839463, "No": 0.0012711490802331952}, "ground_truth": 0}, {"key": "40186158", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9956414740639757, "res": {"Yes": 0.9956414740639757, "No": 0.004357553948668161}, "ground_truth": 1}, {"key": "40186158", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988578147906455, "res": {"Yes": 0.9988578147906455, "No": 0.0011412300580346108}, "ground_truth": 0}, {"key": "40186158", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9950128413772906, "res": {"Yes": 0.9950128413772906, "No": 0.004985933656130914}, "ground_truth": 0}, {"key": "37049719", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.4056527665996565, "res": {"No": 0.5943446553646443, "Yes": 0.4056527665996565}, "ground_truth": 0}, {"key": "37049719", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9313981171152006, "res": {"Yes": 0.9313981171152006, "No": 0.0686002362759773}, "ground_truth": 0}, {"key": "37049719", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.976908719187184, "res": {"Yes": 0.976908719187184, "No": 0.02309075403047887}, "ground_truth": 1}, {"key": "37049719", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9177060891039356, "res": {"Yes": 0.9177060891039356, "No": 0.08229304358192961}, "ground_truth": 0}, {"key": "37049719", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9524451966138244, "res": {"Yes": 0.9524451966138244, "No": 0.04755386549617053}, "ground_truth": 0}, {"key": "34610504", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8389808227891993, "res": {"Yes": 0.8389808227891993, "No": 0.1610165861079115}, "ground_truth": 0}, {"key": "34610504", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9973757106834918, "res": {"Yes": 0.9973757106834918, "No": 0.0026237439308257407}, "ground_truth": 0}, {"key": "34610504", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9982933211355416, "res": {"Yes": 0.9982933211355416, "No": 0.0017062750804059798}, "ground_truth": 1}, {"key": "34610504", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9982316645507795, "res": {"Yes": 0.9982316645507795, "No": 0.001767663607713757}, "ground_truth": 0}, {"key": "34610504", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9551046102229009, "res": {"Yes": 0.9551046102229009, "No": 0.04489119975462498}, "ground_truth": 0}, {"key": "37595429", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8296379733756868, "res": {"Yes": 0.8296379733756868, "No": 0.1703591880537598}, "ground_truth": 0}, {"key": "37595429", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9959289582496686, "res": {"Yes": 0.9959289582496686, "No": 0.004070910477501261}, "ground_truth": 0}, {"key": "37595429", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9975493541475013, "res": {"Yes": 0.9975493541475013, "No": 0.002449946723921741}, "ground_truth": 1}, {"key": "37595429", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985667498853986, "res": {"Yes": 0.9985667498853986, "No": 0.0014320547184007554}, "ground_truth": 0}, {"key": "37595429", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994030621083583, "res": {"Yes": 0.9994030621083583, "No": 0.000596317091297271}, "ground_truth": 0}, {"key": "29772670", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9923587185712284, "res": {"Yes": 0.9923587185712284, "No": 0.007638481134138976}, "ground_truth": 0}, {"key": "29772670", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9523355545767359, "res": {"Yes": 0.9523355545767359, "No": 0.04765970485738191}, "ground_truth": 0}, {"key": "29772670", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7116992418099618, "res": {"Yes": 0.7116992418099618, "No": 0.28828508787725515}, "ground_truth": 1}, {"key": "29772670", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7096670660364982, "res": {"Yes": 0.7096670660364982, "No": 0.29031167260299995}, "ground_truth": 0}, {"key": "29772670", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.7347018046869002, "res": {"Yes": 0.7347018046869002, "No": 0.265270425431088}, "ground_truth": 0}, {"key": "36369872", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9741320154966228, "res": {"Yes": 0.9741320154966228, "No": 0.025867510980372422}, "ground_truth": 0}, {"key": "36369872", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9949532408981613, "res": {"Yes": 0.9949532408981613, "No": 0.005046462884569054}, "ground_truth": 0}, {"key": "36369872", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9967967683287503, "res": {"Yes": 0.9967967683287503, "No": 0.003202864863012283}, "ground_truth": 1}, {"key": "36369872", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.993836348217872, "res": {"Yes": 0.993836348217872, "No": 0.006163077697443284}, "ground_truth": 0}, {"key": "36369872", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9887355429989628, "res": {"Yes": 0.9887355429989628, "No": 0.011263740700821095}, "ground_truth": 0}, {"key": "34527433", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0026528739956441817, "res": {"No": 0.997346418288766, "Yes": 0.0026528739956441817}, "ground_truth": 0}, {"key": "34527433", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9914681248009772, "res": {"Yes": 0.9914681248009772, "No": 0.008530434337149605}, "ground_truth": 0}, {"key": "34527433", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6788539434075531, "res": {"Yes": 0.6788539434075531, "No": 0.3211453354195134}, "ground_truth": 1}, {"key": "34527433", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9854718736295902, "res": {"Yes": 0.9854718736295902, "No": 0.014527639087168915}, "ground_truth": 0}, {"key": "34527433", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9102752069251949, "res": {"Yes": 0.9102752069251949, "No": 0.08972398090461361}, "ground_truth": 0}, {"key": "31111734", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7086931774955318, "res": {"Yes": 0.7086931774955318, "No": 0.2913012278369664}, "ground_truth": 0}, {"key": "31111734", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8262244178917102, "res": {"Yes": 0.8262244178917102, "No": 0.17377370016709462}, "ground_truth": 0}, {"key": "31111734", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9011152158698114, "res": {"Yes": 0.9011152158698114, "No": 0.09888016663991055}, "ground_truth": 1}, {"key": "31111734", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6622838490121579, "res": {"Yes": 0.6622838490121579, "No": 0.3377148427783302}, "ground_truth": 0}, {"key": "31111734", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.45948990819797125, "res": {"No": 0.5405075626764114, "Yes": 0.45948990819797125}, "ground_truth": 0}, {"key": "40303872", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.1550917502642334, "res": {"No": 0.8449068199110628, "Yes": 0.1550917502642334}, "ground_truth": 0}, {"key": "40303872", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9955074866638872, "res": {"Yes": 0.9955074866638872, "No": 0.004491756729327457}, "ground_truth": 0}, {"key": "40303872", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9906907389828092, "res": {"Yes": 0.9906907389828092, "No": 0.009309084757413875}, "ground_truth": 1}, {"key": "40303872", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.996119353246581, "res": {"Yes": 0.996119353246581, "No": 0.0038804486522834723}, "ground_truth": 0}, {"key": "40303872", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9917566106186309, "res": {"Yes": 0.9917566106186309, "No": 0.008242892212360315}, "ground_truth": 0}, {"key": "33653553", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0021857243392342106, "res": {"No": 0.9978135869906245, "Yes": 0.0021857243392342106}, "ground_truth": 0}, {"key": "33653553", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995752546298654, "res": {"Yes": 0.9995752546298654, "No": 0.00042437782619893694}, "ground_truth": 0}, {"key": "33653553", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9711863252864809, "res": {"Yes": 0.9711863252864809, "No": 0.028813080814719665}, "ground_truth": 1}, {"key": "33653553", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9650961068610893, "res": {"Yes": 0.9650961068610893, "No": 0.03490294742155686}, "ground_truth": 0}, {"key": "33653553", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9095499656862372, "res": {"Yes": 0.9095499656862372, "No": 0.09044876691629357}, "ground_truth": 0}, {"key": "34404510", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9451608852852562, "res": {"Yes": 0.9451608852852562, "No": 0.05483812035216853}, "ground_truth": 0}, {"key": "34404510", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995567971134283, "res": {"Yes": 0.9995567971134283, "No": 0.00044194960421508857}, "ground_truth": 0}, {"key": "34404510", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9963193041285048, "res": {"Yes": 0.9963193041285048, "No": 0.003680325793049271}, "ground_truth": 1}, {"key": "34404510", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9915296482983668, "res": {"Yes": 0.9915296482983668, "No": 0.008469227319841155}, "ground_truth": 0}, {"key": "34404510", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9957492595808721, "res": {"Yes": 0.9957492595808721, "No": 0.004250646032290634}, "ground_truth": 0}, {"key": "35568692", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.2489942936122282, "res": {"No": 0.7508407727528106, "Yes": 0.2489942936122282}, "ground_truth": 0}, {"key": "35568692", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8741559908663602, "res": {"Yes": 0.8741559908663602, "No": 0.1258313450516554}, "ground_truth": 0}, {"key": "35568692", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9716439375753024, "res": {"Yes": 0.9716439375753024, "No": 0.02835031040041671}, "ground_truth": 1}, {"key": "35568692", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9500172468247253, "res": {"Yes": 0.9500172468247253, "No": 0.049976494204655976}, "ground_truth": 0}, {"key": "35568692", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9538671910125053, "res": {"Yes": 0.9538671910125053, "No": 0.04612106001495001}, "ground_truth": 0}, {"key": "39151664", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8893960732236567, "res": {"Yes": 0.8893960732236567, "No": 0.11059675834977241}, "ground_truth": 0}, {"key": "39151664", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.5149176675158799, "res": {"Yes": 0.5149176675158799, "No": 0.48508038961328176}, "ground_truth": 0}, {"key": "39151664", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8149612279014229, "res": {"Yes": 0.8149612279014229, "No": 0.1850275955301446}, "ground_truth": 1}, {"key": "39151664", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.17685667857112675, "res": {"No": 0.8231415338536824, "Yes": 0.17685667857112675}, "ground_truth": 0}, {"key": "39151664", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.25727071055657025, "res": {"No": 0.7427100668519488, "Yes": 0.25727071055657025}, "ground_truth": 0}, {"key": "37493670", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9716161431263701, "res": {"Yes": 0.9716161431263701, "No": 0.02837471066672383}, "ground_truth": 0}, {"key": "37493670", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9957776307733738, "res": {"Yes": 0.9957776307733738, "No": 0.004220422658106844}, "ground_truth": 0}, {"key": "37493670", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9904433478367046, "res": {"Yes": 0.9904433478367046, "No": 0.009552245702874228}, "ground_truth": 1}, {"key": "37493670", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9949455684453221, "res": {"Yes": 0.9949455684453221, "No": 0.005051971580023084}, "ground_truth": 0}, {"key": "37493670", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9873926312166877, "res": {"Yes": 0.9873926312166877, "No": 0.012600535070672093}, "ground_truth": 0}, {"key": "21935983", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9934360626728308, "res": {"Yes": 0.9934360626728308, "No": 0.00656272412018704}, "ground_truth": 0}, {"key": "21935983", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9946627768577927, "res": {"Yes": 0.9946627768577927, "No": 0.005336362702214357}, "ground_truth": 0}, {"key": "21935983", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992738923438429, "res": {"Yes": 0.9992738923438429, "No": 0.0007257898847303812}, "ground_truth": 1}, {"key": "21935983", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9919532835517341, "res": {"Yes": 0.9919532835517341, "No": 0.008045526952993664}, "ground_truth": 0}, {"key": "21935983", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.993584908525835, "res": {"Yes": 0.993584908525835, "No": 0.006413199439804387}, "ground_truth": 0}, {"key": "38174214", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9913030313438135, "res": {"Yes": 0.9913030313438135, "No": 0.0086962707034168}, "ground_truth": 0}, {"key": "38174214", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9906652389313749, "res": {"Yes": 0.9906652389313749, "No": 0.009334164573447256}, "ground_truth": 0}, {"key": "38174214", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9923135185541382, "res": {"Yes": 0.9923135185541382, "No": 0.007685655573765752}, "ground_truth": 1}, {"key": "38174214", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9133353958872217, "res": {"Yes": 0.9133353958872217, "No": 0.08666303858428473}, "ground_truth": 0}, {"key": "38174214", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9702146105177354, "res": {"Yes": 0.9702146105177354, "No": 0.029782598469767278}, "ground_truth": 0}, {"key": "40319923", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7372326241709213, "res": {"Yes": 0.7372326241709213, "No": 0.2627661252586811}, "ground_truth": 0}, {"key": "40319923", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.6794432813704584, "res": {"Yes": 0.6794432813704584, "No": 0.32055352880976395}, "ground_truth": 0}, {"key": "40319923", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9558699793922998, "res": {"Yes": 0.9558699793922998, "No": 0.04412834320117294}, "ground_truth": 1}, {"key": "40319923", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.944908457075738, "res": {"Yes": 0.944908457075738, "No": 0.055086560700801084}, "ground_truth": 0}, {"key": "40319923", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.795945645513522, "res": {"Yes": 0.795945645513522, "No": 0.20405190116929897}, "ground_truth": 0}, {"key": "36478264", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9418528507524834, "res": {"Yes": 0.9418528507524834, "No": 0.05813662122530246}, "ground_truth": 0}, {"key": "36478264", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9869074055321256, "res": {"Yes": 0.9869074055321256, "No": 0.013091305125758524}, "ground_truth": 0}, {"key": "36478264", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998494962706284, "res": {"Yes": 0.998494962706284, "No": 0.0015037098363894852}, "ground_truth": 1}, {"key": "36478264", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985674640605935, "res": {"Yes": 0.9985674640605935, "No": 0.0014316791397074177}, "ground_truth": 0}, {"key": "36478264", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9924041464191031, "res": {"Yes": 0.9924041464191031, "No": 0.007592551333402914}, "ground_truth": 0}, {"key": "11935769", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6085180767069438, "res": {"Yes": 0.6085180767069438, "No": 0.3914720336292468}, "ground_truth": 0}, {"key": "11935769", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9984085640792769, "res": {"Yes": 0.9984085640792769, "No": 0.0015899272794355265}, "ground_truth": 0}, {"key": "11935769", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9977993541793476, "res": {"Yes": 0.9977993541793476, "No": 0.002200115323981843}, "ground_truth": 1}, {"key": "11935769", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998960227438491, "res": {"Yes": 0.998960227438491, "No": 0.0010387169805264754}, "ground_truth": 0}, {"key": "11935769", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9960454423389669, "res": {"Yes": 0.9960454423389669, "No": 0.003952330002206681}, "ground_truth": 0}, {"key": "33373410", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 1.2700654886030848e-05, "res": {"No": 0.9999864556687252, "Yes": 1.2700654886030848e-05}, "ground_truth": 0}, {"key": "33373410", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9957550543605496, "res": {"Yes": 0.9957550543605496, "No": 0.004243380668324099}, "ground_truth": 0}, {"key": "33373410", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9875404811277342, "res": {"Yes": 0.9875404811277342, "No": 0.012457026970408102}, "ground_truth": 1}, {"key": "33373410", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9701221660825893, "res": {"Yes": 0.9701221660825893, "No": 0.02987499607832282}, "ground_truth": 0}, {"key": "33373410", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9269245947839133, "res": {"Yes": 0.9269245947839133, "No": 0.07307190665346552}, "ground_truth": 0}, {"key": "11130680", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5708456049627364, "res": {"Yes": 0.5708456049627364, "No": 0.4291531031167243}, "ground_truth": 0}, {"key": "11130680", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.35892389665489643, "res": {"No": 0.6410750475843023, "Yes": 0.35892389665489643}, "ground_truth": 0}, {"key": "11130680", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9125608142434254, "res": {"Yes": 0.9125608142434254, "No": 0.0874389450527193}, "ground_truth": 1}, {"key": "11130680", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9196781887210227, "res": {"Yes": 0.9196781887210227, "No": 0.08032074882996053}, "ground_truth": 0}, {"key": "11130680", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.955592435051673, "res": {"Yes": 0.955592435051673, "No": 0.04440678526216494}, "ground_truth": 0}, {"key": "34868650", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9642512052108497, "res": {"Yes": 0.9642512052108497, "No": 0.03574801985669653}, "ground_truth": 0}, {"key": "34868650", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981485193089302, "res": {"Yes": 0.9981485193089302, "No": 0.0018512818276712644}, "ground_truth": 0}, {"key": "34868650", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9976443728544455, "res": {"Yes": 0.9976443728544455, "No": 0.002355192061646248}, "ground_truth": 1}, {"key": "34868650", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991763017258258, "res": {"Yes": 0.9991763017258258, "No": 0.0008236251883880067}, "ground_truth": 0}, {"key": "34868650", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992365269339868, "res": {"Yes": 0.9992365269339868, "No": 0.0007633277992509227}, "ground_truth": 0}, {"key": "33960561", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6454421326768857, "res": {"Yes": 0.6454421326768857, "No": 0.35455521165105575}, "ground_truth": 0}, {"key": "33960561", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9969437840652676, "res": {"Yes": 0.9969437840652676, "No": 0.003055922707641005}, "ground_truth": 0}, {"key": "33960561", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9957615531771191, "res": {"Yes": 0.9957615531771191, "No": 0.0042375826586488995}, "ground_truth": 1}, {"key": "33960561", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9969453250424657, "res": {"Yes": 0.9969453250424657, "No": 0.0030540622172888655}, "ground_truth": 0}, {"key": "33960561", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9895437061308713, "res": {"Yes": 0.9895437061308713, "No": 0.010455480377527259}, "ground_truth": 0}, {"key": "22504858", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6786275697925103, "res": {"Yes": 0.6786275697925103, "No": 0.32136783772247807}, "ground_truth": 0}, {"key": "22504858", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9978077699539311, "res": {"Yes": 0.9978077699539311, "No": 0.002191640344001641}, "ground_truth": 0}, {"key": "22504858", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9973094283934215, "res": {"Yes": 0.9973094283934215, "No": 0.0026885023167205436}, "ground_truth": 1}, {"key": "22504858", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9812528749543771, "res": {"Yes": 0.9812528749543771, "No": 0.01874603904154458}, "ground_truth": 0}, {"key": "22504858", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9749553411324905, "res": {"Yes": 0.9749553411324905, "No": 0.02504072448654664}, "ground_truth": 0}, {"key": "32283530", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.08295261813789152, "res": {"No": 0.9170436655770391, "Yes": 0.08295261813789152}, "ground_truth": 0}, {"key": "32283530", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9963106635868073, "res": {"Yes": 0.9963106635868073, "No": 0.003688826105994754}, "ground_truth": 0}, {"key": "32283530", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5766006537366704, "res": {"Yes": 0.5766006537366704, "No": 0.42339774340314656}, "ground_truth": 1}, {"key": "32283530", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9840849542860864, "res": {"Yes": 0.9840849542860864, "No": 0.01591401677379124}, "ground_truth": 0}, {"key": "32283530", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.4347085554094079, "res": {"No": 0.5652888158202324, "Yes": 0.4347085554094079}, "ground_truth": 0}, {"key": "38377099", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.004808973149920873, "res": {"No": 0.9951906143102937, "Yes": 0.004808973149920873}, "ground_truth": 0}, {"key": "38377099", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991615363572497, "res": {"Yes": 0.9991615363572497, "No": 0.0008382375710391293}, "ground_truth": 0}, {"key": "38377099", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984178289543753, "res": {"Yes": 0.9984178289543753, "No": 0.0015820465267293827}, "ground_truth": 1}, {"key": "38377099", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9944978152729753, "res": {"Yes": 0.9944978152729753, "No": 0.005502051259510388}, "ground_truth": 0}, {"key": "38377099", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9955020571808612, "res": {"Yes": 0.9955020571808612, "No": 0.004497751381922859}, "ground_truth": 0}, {"key": "36105123", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9667292619797124, "res": {"Yes": 0.9667292619797124, "No": 0.03326935420069262}, "ground_truth": 0}, {"key": "36105123", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.029171723855075327, "res": {"No": 0.970826216933898, "Yes": 0.029171723855075327}, "ground_truth": 0}, {"key": "36105123", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9778999722901357, "res": {"Yes": 0.9778999722901357, "No": 0.0220984943884202}, "ground_truth": 1}, {"key": "36105123", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.921924081428135, "res": {"Yes": 0.921924081428135, "No": 0.07807477950191094}, "ground_truth": 0}, {"key": "36105123", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.04862542678561444, "res": {"No": 0.9513720187708159, "Yes": 0.04862542678561444}, "ground_truth": 0}, {"key": "33527826", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7664894075569509, "res": {"Yes": 0.7664894075569509, "No": 0.23350936397514394}, "ground_truth": 0}, {"key": "33527826", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9976323908184244, "res": {"Yes": 0.9976323908184244, "No": 0.0023675223325056462}, "ground_truth": 0}, {"key": "33527826", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9982250119578421, "res": {"Yes": 0.9982250119578421, "No": 0.0017746176281885197}, "ground_truth": 1}, {"key": "33527826", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9960436686824268, "res": {"Yes": 0.9960436686824268, "No": 0.003954555790498537}, "ground_truth": 0}, {"key": "33527826", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9951358412228655, "res": {"Yes": 0.9951358412228655, "No": 0.004863089711314128}, "ground_truth": 0}, {"key": "32349891", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0019837961943856425, "res": {"No": 0.9980155234824611, "Yes": 0.0019837961943856425}, "ground_truth": 0}, {"key": "32349891", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.973050856921689, "res": {"Yes": 0.973050856921689, "No": 0.026947342365899112}, "ground_truth": 0}, {"key": "32349891", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8620996601311578, "res": {"Yes": 0.8620996601311578, "No": 0.13789660689956446}, "ground_truth": 1}, {"key": "32349891", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9711763201764941, "res": {"Yes": 0.9711763201764941, "No": 0.028821800972267494}, "ground_truth": 0}, {"key": "32349891", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9851435562930025, "res": {"Yes": 0.9851435562930025, "No": 0.014855234266200886}, "ground_truth": 0}, {"key": "34281974", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9636123131921034, "res": {"Yes": 0.9636123131921034, "No": 0.03638587605885311}, "ground_truth": 0}, {"key": "34281974", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999397295583361, "res": {"Yes": 0.9999397295583361, "No": 6.0179608034636164e-05}, "ground_truth": 0}, {"key": "34281974", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999023025627055, "res": {"Yes": 0.9999023025627055, "No": 9.756282778017196e-05}, "ground_truth": 1}, {"key": "34281974", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996675695272583, "res": {"Yes": 0.9996675695272583, "No": 0.00033220742833585496}, "ground_truth": 0}, {"key": "34281974", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999172016779703, "res": {"Yes": 0.9999172016779703, "No": 8.271893682356011e-05}, "ground_truth": 0}, {"key": "29387866", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8483099524568762, "res": {"Yes": 0.8483099524568762, "No": 0.15168799988626958}, "ground_truth": 0}, {"key": "29387866", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9884224921555302, "res": {"Yes": 0.9884224921555302, "No": 0.011576910947263193}, "ground_truth": 0}, {"key": "29387866", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9865480679094876, "res": {"Yes": 0.9865480679094876, "No": 0.013451312419993476}, "ground_truth": 1}, {"key": "29387866", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9919528143579419, "res": {"Yes": 0.9919528143579419, "No": 0.008046167402428667}, "ground_truth": 0}, {"key": "29387866", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994710561401526, "res": {"Yes": 0.9994710561401526, "No": 0.0005287507548460408}, "ground_truth": 0}, {"key": "35731925", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8996275721702461, "res": {"Yes": 0.8996275721702461, "No": 0.10036601751348144}, "ground_truth": 0}, {"key": "35731925", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9303021808189088, "res": {"Yes": 0.9303021808189088, "No": 0.06968594719357443}, "ground_truth": 0}, {"key": "35731925", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2744752736136529, "res": {"No": 0.7254974220548827, "Yes": 0.2744752736136529}, "ground_truth": 1}, {"key": "35731925", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8552924514936966, "res": {"Yes": 0.8552924514936966, "No": 0.14469637463679424}, "ground_truth": 0}, {"key": "35731925", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.4336383140929352, "res": {"No": 0.5663460453817188, "Yes": 0.4336383140929352}, "ground_truth": 0}, {"key": "38829733", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.3176410840929164, "res": {"No": 0.6823560021803485, "Yes": 0.3176410840929164}, "ground_truth": 0}, {"key": "38829733", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9961838316487586, "res": {"Yes": 0.9961838316487586, "No": 0.003816049323630268}, "ground_truth": 0}, {"key": "38829733", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998910985741908, "res": {"Yes": 0.9998910985741908, "No": 0.00010851913945864488}, "ground_truth": 1}, {"key": "38829733", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995063040579669, "res": {"Yes": 0.9995063040579669, "No": 0.0004932321511155593}, "ground_truth": 0}, {"key": "38829733", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9980963749968089, "res": {"Yes": 0.9980963749968089, "No": 0.0019032277849730364}, "ground_truth": 0}, {"key": "24624736", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9038629500590284, "res": {"Yes": 0.9038629500590284, "No": 0.09613641563269851}, "ground_truth": 0}, {"key": "24624736", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9932133808262936, "res": {"Yes": 0.9932133808262936, "No": 0.006786387577307661}, "ground_truth": 0}, {"key": "24624736", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9925144082477177, "res": {"Yes": 0.9925144082477177, "No": 0.007484717529742526}, "ground_truth": 1}, {"key": "24624736", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9974685687895322, "res": {"Yes": 0.9974685687895322, "No": 0.002531138696067928}, "ground_truth": 0}, {"key": "24624736", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.993898052661765, "res": {"Yes": 0.993898052661765, "No": 0.006101607609397023}, "ground_truth": 0}, {"key": "36928562", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8526156748975541, "res": {"Yes": 0.8526156748975541, "No": 0.14738166446654608}, "ground_truth": 0}, {"key": "36928562", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.12152598144601442, "res": {"No": 0.8784702996074223, "Yes": 0.12152598144601442}, "ground_truth": 0}, {"key": "36928562", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.19256302042497805, "res": {"No": 0.8074304965287376, "Yes": 0.19256302042497805}, "ground_truth": 1}, {"key": "36928562", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.23507029074714642, "res": {"No": 0.7649163940129698, "Yes": 0.23507029074714642}, "ground_truth": 0}, {"key": "36928562", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.148113560317378, "res": {"No": 0.8518763515274574, "Yes": 0.148113560317378}, "ground_truth": 0}, {"key": "34941119", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5366898885595643, "res": {"Yes": 0.5366898885595643, "No": 0.4633090745419015}, "ground_truth": 0}, {"key": "34941119", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9940777846621386, "res": {"Yes": 0.9940777846621386, "No": 0.005921248245689687}, "ground_truth": 0}, {"key": "34941119", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9834951682268673, "res": {"Yes": 0.9834951682268673, "No": 0.016504642071119666}, "ground_truth": 1}, {"key": "34941119", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.993428294728596, "res": {"Yes": 0.993428294728596, "No": 0.006571415635901921}, "ground_truth": 0}, {"key": "34941119", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9886198296457043, "res": {"Yes": 0.9886198296457043, "No": 0.011378109373299269}, "ground_truth": 0}, {"key": "30206231", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9960796251133897, "res": {"Yes": 0.9960796251133897, "No": 0.0039195673545817005}, "ground_truth": 0}, {"key": "30206231", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985092239122817, "res": {"Yes": 0.9985092239122817, "No": 0.00149039066815326}, "ground_truth": 0}, {"key": "30206231", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998298474475878, "res": {"Yes": 0.9998298474475878, "No": 0.00016946256686292826}, "ground_truth": 1}, {"key": "30206231", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9670011831531147, "res": {"Yes": 0.9670011831531147, "No": 0.032996948219136985}, "ground_truth": 0}, {"key": "30206231", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9722937414869555, "res": {"Yes": 0.9722937414869555, "No": 0.027704802648646446}, "ground_truth": 0}, {"key": "35584972", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9077659844269843, "res": {"Yes": 0.9077659844269843, "No": 0.09223105343258983}, "ground_truth": 0}, {"key": "35584972", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.977623946179829, "res": {"Yes": 0.977623946179829, "No": 0.022374939919205413}, "ground_truth": 0}, {"key": "35584972", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9903625349183448, "res": {"Yes": 0.9903625349183448, "No": 0.00963596910924509}, "ground_truth": 1}, {"key": "35584972", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9863673449123478, "res": {"Yes": 0.9863673449123478, "No": 0.013630187185805814}, "ground_truth": 0}, {"key": "35584972", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.991638195998632, "res": {"Yes": 0.991638195998632, "No": 0.008359313269970175}, "ground_truth": 0}, {"key": "39277709", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8956216919253414, "res": {"Yes": 0.8956216919253414, "No": 0.10437494737529864}, "ground_truth": 0}, {"key": "39277709", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9552680521556319, "res": {"Yes": 0.9552680521556319, "No": 0.04473015483483501}, "ground_truth": 0}, {"key": "39277709", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.30350011183448483, "res": {"No": 0.6964965772418358, "Yes": 0.30350011183448483}, "ground_truth": 1}, {"key": "39277709", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9570165959873275, "res": {"Yes": 0.9570165959873275, "No": 0.042982681509904704}, "ground_truth": 0}, {"key": "39277709", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.935798110200618, "res": {"Yes": 0.935798110200618, "No": 0.06420042810540277}, "ground_truth": 0}, {"key": "36123657", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5390291111375336, "res": {"Yes": 0.5390291111375336, "No": 0.46096808883383245}, "ground_truth": 0}, {"key": "36123657", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9891326361266812, "res": {"Yes": 0.9891326361266812, "No": 0.01086643063737979}, "ground_truth": 0}, {"key": "36123657", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9430407161757864, "res": {"Yes": 0.9430407161757864, "No": 0.05695899249350509}, "ground_truth": 1}, {"key": "36123657", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9902209677612731, "res": {"Yes": 0.9902209677612731, "No": 0.009778550661870604}, "ground_truth": 0}, {"key": "36123657", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8969500536708324, "res": {"Yes": 0.8969500536708324, "No": 0.10304900780286279}, "ground_truth": 0}, {"key": "33363938", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 5.896349702620846e-05, "res": {"No": 0.9999391335724361, "Yes": 5.896349702620846e-05}, "ground_truth": 0}, {"key": "33363938", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.0008540353257366562, "res": {"No": 0.9991444112249829, "Yes": 0.0008540353257366562}, "ground_truth": 0}, {"key": "33363938", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.005164159084622702, "res": {"No": 0.994833950469969, "Yes": 0.005164159084622702}, "ground_truth": 1}, {"key": "33363938", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.0027840154136864097, "res": {"No": 0.997215760205142, "Yes": 0.0027840154136864097}, "ground_truth": 0}, {"key": "33363938", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.001285859387660285, "res": {"No": 0.9987132113234675, "Yes": 0.001285859387660285}, "ground_truth": 0}, {"key": "37349129", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.006625922973889682, "res": {"No": 0.9933692319071207, "Yes": 0.006625922973889682}, "ground_truth": 0}, {"key": "37349129", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.7795618327697866, "res": {"Yes": 0.7795618327697866, "No": 0.22043392254572627}, "ground_truth": 0}, {"key": "37349129", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.1878756821582665, "res": {"No": 0.8121196804211758, "Yes": 0.1878756821582665}, "ground_truth": 1}, {"key": "37349129", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.11755881705038954, "res": {"No": 0.8824255820670295, "Yes": 0.11755881705038954}, "ground_truth": 0}, {"key": "37349129", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.08846894433685931, "res": {"No": 0.9115237948935866, "Yes": 0.08846894433685931}, "ground_truth": 0}, {"key": "37160199", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9605111925541835, "res": {"Yes": 0.9605111925541835, "No": 0.03948685686655712}, "ground_truth": 0}, {"key": "37160199", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9984142730956096, "res": {"Yes": 0.9984142730956096, "No": 0.001584565566117877}, "ground_truth": 0}, {"key": "37160199", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9215753650769933, "res": {"Yes": 0.9215753650769933, "No": 0.07842246886655214}, "ground_truth": 1}, {"key": "37160199", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992756791071162, "res": {"Yes": 0.9992756791071162, "No": 0.0007234606512744551}, "ground_truth": 0}, {"key": "37160199", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9782111788130392, "res": {"Yes": 0.9782111788130392, "No": 0.02178723567861641}, "ground_truth": 0}, {"key": "35891053", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.06930852946895116, "res": {"No": 0.9306903745553667, "Yes": 0.06930852946895116}, "ground_truth": 0}, {"key": "35891053", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9825923595753862, "res": {"Yes": 0.9825923595753862, "No": 0.017406475270613973}, "ground_truth": 0}, {"key": "35891053", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9973352721072143, "res": {"Yes": 0.9973352721072143, "No": 0.0026642838011897195}, "ground_truth": 1}, {"key": "35891053", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9820499847064638, "res": {"Yes": 0.9820499847064638, "No": 0.01794972113182514}, "ground_truth": 0}, {"key": "35891053", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990431414214058, "res": {"Yes": 0.9990431414214058, "No": 0.0009568078216499029}, "ground_truth": 0}, {"key": "40694542", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9446017092742521, "res": {"Yes": 0.9446017092742521, "No": 0.055397378823691674}, "ground_truth": 0}, {"key": "40694542", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9185824889781878, "res": {"Yes": 0.9185824889781878, "No": 0.08141539468182178}, "ground_truth": 0}, {"key": "40694542", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9840147728338969, "res": {"Yes": 0.9840147728338969, "No": 0.015983276853460564}, "ground_truth": 1}, {"key": "40694542", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.973366214439623, "res": {"Yes": 0.973366214439623, "No": 0.0266334908667171}, "ground_truth": 0}, {"key": "40694542", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9922135248846695, "res": {"Yes": 0.9922135248846695, "No": 0.007784964762804474}, "ground_truth": 0}, {"key": "24645770", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.994728248711116, "res": {"Yes": 0.994728248711116, "No": 0.005271441101581572}, "ground_truth": 0}, {"key": "24645770", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9975551624952806, "res": {"Yes": 0.9975551624952806, "No": 0.0024446028568128387}, "ground_truth": 0}, {"key": "24645770", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9912804368313194, "res": {"Yes": 0.9912804368313194, "No": 0.008719160662451604}, "ground_truth": 1}, {"key": "24645770", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986958521409087, "res": {"Yes": 0.9986958521409087, "No": 0.0013039298668689448}, "ground_truth": 0}, {"key": "24645770", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9677999725366613, "res": {"Yes": 0.9677999725366613, "No": 0.032198835232170174}, "ground_truth": 0}, {"key": "37974587", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9781685121283662, "res": {"Yes": 0.9781685121283662, "No": 0.021830543757586855}, "ground_truth": 0}, {"key": "37974587", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992054610133957, "res": {"Yes": 0.9992054610133957, "No": 0.000794186132807015}, "ground_truth": 0}, {"key": "37974587", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9950146129992319, "res": {"Yes": 0.9950146129992319, "No": 0.004985011343721058}, "ground_truth": 1}, {"key": "37974587", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9955766067499454, "res": {"Yes": 0.9955766067499454, "No": 0.004421651544860228}, "ground_truth": 0}, {"key": "37974587", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.03009873905395439, "res": {"No": 0.9698999637089032, "Yes": 0.03009873905395439}, "ground_truth": 0}, {"key": "40354149", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.3613984824209831, "res": {"No": 0.6386004589629144, "Yes": 0.3613984824209831}, "ground_truth": 0}, {"key": "40354149", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.1574634360077058, "res": {"No": 0.842536073888888, "Yes": 0.1574634360077058}, "ground_truth": 0}, {"key": "40354149", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3709228460986666, "res": {"No": 0.6290768196057844, "Yes": 0.3709228460986666}, "ground_truth": 1}, {"key": "40354149", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.20929276871159022, "res": {"No": 0.790706712996582, "Yes": 0.20929276871159022}, "ground_truth": 0}, {"key": "40354149", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.04528237477582098, "res": {"No": 0.9547162242887965, "Yes": 0.04528237477582098}, "ground_truth": 0}, {"key": "35519470", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9924216416660118, "res": {"Yes": 0.9924216416660118, "No": 0.007577783775355895}, "ground_truth": 0}, {"key": "35519470", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.998277043895984, "res": {"Yes": 0.998277043895984, "No": 0.0017229257772335385}, "ground_truth": 0}, {"key": "35519470", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990336216446105, "res": {"Yes": 0.9990336216446105, "No": 0.0009661253212725932}, "ground_truth": 1}, {"key": "35519470", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.99549732855732, "res": {"Yes": 0.99549732855732, "No": 0.004500973825281816}, "ground_truth": 0}, {"key": "35519470", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9983207557110936, "res": {"Yes": 0.9983207557110936, "No": 0.0016775749676712862}, "ground_truth": 0}, {"key": "36185624", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.032728058856616664, "res": {"No": 0.9672706852507955, "Yes": 0.032728058856616664}, "ground_truth": 0}, {"key": "36185624", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9519810574740727, "res": {"Yes": 0.9519810574740727, "No": 0.04801746404380202}, "ground_truth": 0}, {"key": "36185624", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9757579289000332, "res": {"Yes": 0.9757579289000332, "No": 0.02424113246546494}, "ground_truth": 1}, {"key": "36185624", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9962899608648401, "res": {"Yes": 0.9962899608648401, "No": 0.003709887968457514}, "ground_truth": 0}, {"key": "36185624", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9941826298090434, "res": {"Yes": 0.9941826298090434, "No": 0.005816177616087417}, "ground_truth": 0}, {"key": "39306113", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.04914177457027192, "res": {"No": 0.9508496458881716, "Yes": 0.04914177457027192}, "ground_truth": 0}, {"key": "39306113", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9105345721724243, "res": {"Yes": 0.9105345721724243, "No": 0.08946276265489786}, "ground_truth": 0}, {"key": "39306113", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8636343895316031, "res": {"Yes": 0.8636343895316031, "No": 0.13636248609897045}, "ground_truth": 1}, {"key": "39306113", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9884771109262, "res": {"Yes": 0.9884771109262, "No": 0.011521592447244244}, "ground_truth": 0}, {"key": "39306113", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9652825669625608, "res": {"Yes": 0.9652825669625608, "No": 0.03471368810247935}, "ground_truth": 0}, {"key": "19347718", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.02605744904072071, "res": {"No": 0.9739418883543953, "Yes": 0.02605744904072071}, "ground_truth": 0}, {"key": "19347718", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992946080059903, "res": {"Yes": 0.9992946080059903, "No": 0.0007052609371597565}, "ground_truth": 0}, {"key": "19347718", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996279070137725, "res": {"Yes": 0.9996279070137725, "No": 0.00037190095096188144}, "ground_truth": 1}, {"key": "19347718", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.41176742147991463, "res": {"No": 0.5882319888693862, "Yes": 0.41176742147991463}, "ground_truth": 0}, {"key": "19347718", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9770922702235855, "res": {"Yes": 0.9770922702235855, "No": 0.02290741320650821}, "ground_truth": 0}, {"key": "21870064", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9991306065416778, "res": {"Yes": 0.9991306065416778, "No": 0.0008690258380994449}, "ground_truth": 0}, {"key": "21870064", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996499370563354, "res": {"Yes": 0.9996499370563354, "No": 0.00034963633028592286}, "ground_truth": 0}, {"key": "21870064", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998509394606263, "res": {"Yes": 0.9998509394606263, "No": 0.0001484257091477774}, "ground_truth": 1}, {"key": "21870064", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9977731269689012, "res": {"Yes": 0.9977731269689012, "No": 0.00222662787869761}, "ground_truth": 0}, {"key": "21870064", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997165361334757, "res": {"Yes": 0.9997165361334757, "No": 0.00028299452692377724}, "ground_truth": 0}, {"key": "37675776", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9814262295671726, "res": {"Yes": 0.9814262295671726, "No": 0.018572668767928607}, "ground_truth": 0}, {"key": "37675776", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9454031156871193, "res": {"Yes": 0.9454031156871193, "No": 0.054594998663301905}, "ground_truth": 0}, {"key": "37675776", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9926593451876464, "res": {"Yes": 0.9926593451876464, "No": 0.007339955492985872}, "ground_truth": 1}, {"key": "37675776", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9982107514171845, "res": {"Yes": 0.9982107514171845, "No": 0.0017885309400034314}, "ground_truth": 0}, {"key": "37675776", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9884386846254288, "res": {"Yes": 0.9884386846254288, "No": 0.011559623332066943}, "ground_truth": 0}, {"key": "38107017", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9416106571230723, "res": {"Yes": 0.9416106571230723, "No": 0.05838900368163614}, "ground_truth": 0}, {"key": "38107017", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996169480831258, "res": {"Yes": 0.9996169480831258, "No": 0.00038273439308468603}, "ground_truth": 0}, {"key": "38107017", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9964454628773117, "res": {"Yes": 0.9964454628773117, "No": 0.003554273488155572}, "ground_truth": 1}, {"key": "38107017", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9977701571016087, "res": {"Yes": 0.9977701571016087, "No": 0.0022295868048126015}, "ground_truth": 0}, {"key": "38107017", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9968362198335396, "res": {"Yes": 0.9968362198335396, "No": 0.003163418602222411}, "ground_truth": 0}, {"key": "40046472", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9898524266050025, "res": {"Yes": 0.9898524266050025, "No": 0.010145084261720644}, "ground_truth": 0}, {"key": "40046472", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9957108520820173, "res": {"Yes": 0.9957108520820173, "No": 0.004286548565657699}, "ground_truth": 0}, {"key": "40046472", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983003202943813, "res": {"Yes": 0.9983003202943813, "No": 0.0016962404108123795}, "ground_truth": 1}, {"key": "40046472", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9973952802845595, "res": {"Yes": 0.9973952802845595, "No": 0.0025996059581446583}, "ground_truth": 0}, {"key": "40046472", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9879546436745811, "res": {"Yes": 0.9879546436745811, "No": 0.012038445003206158}, "ground_truth": 0}, {"key": "32157820", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6900373654713611, "res": {"Yes": 0.6900373654713611, "No": 0.30996056477441125}, "ground_truth": 0}, {"key": "32157820", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9905508316282915, "res": {"Yes": 0.9905508316282915, "No": 0.009448429894602737}, "ground_truth": 0}, {"key": "32157820", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9894768162267514, "res": {"Yes": 0.9894768162267514, "No": 0.01052074770056532}, "ground_truth": 1}, {"key": "32157820", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9847341713848361, "res": {"Yes": 0.9847341713848361, "No": 0.015265171774886661}, "ground_truth": 0}, {"key": "32157820", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9750389715135228, "res": {"Yes": 0.9750389715135228, "No": 0.024960375288424836}, "ground_truth": 0}, {"key": "41004037", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.010515880876630815, "res": {"No": 0.9894831241615614, "Yes": 0.010515880876630815}, "ground_truth": 0}, {"key": "41004037", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993963944131913, "res": {"Yes": 0.9993963944131913, "No": 0.0006020708671894836}, "ground_truth": 0}, {"key": "41004037", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9970917827805921, "res": {"Yes": 0.9970917827805921, "No": 0.002908015759398694}, "ground_truth": 1}, {"key": "41004037", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9911025278726298, "res": {"Yes": 0.9911025278726298, "No": 0.008896758340472478}, "ground_truth": 0}, {"key": "41004037", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990668222813938, "res": {"Yes": 0.9990668222813938, "No": 0.0009326747728874496}, "ground_truth": 0}, {"key": "21387993", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9641587803771312, "res": {"Yes": 0.9641587803771312, "No": 0.035840716480136106}, "ground_truth": 0}, {"key": "21387993", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985701908524907, "res": {"Yes": 0.9985701908524907, "No": 0.0014296633242750377}, "ground_truth": 0}, {"key": "21387993", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994086577118094, "res": {"Yes": 0.9994086577118094, "No": 0.0005909849536614184}, "ground_truth": 1}, {"key": "21387993", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9967945215513666, "res": {"Yes": 0.9967945215513666, "No": 0.0032051919487411746}, "ground_truth": 0}, {"key": "21387993", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986598214424457, "res": {"Yes": 0.9986598214424457, "No": 0.0013400638726481179}, "ground_truth": 0}, {"key": "34665539", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.13047347885990299, "res": {"No": 0.8695192380636136, "Yes": 0.13047347885990299}, "ground_truth": 0}, {"key": "34665539", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.771781885972888, "res": {"Yes": 0.771781885972888, "No": 0.22821141976838882}, "ground_truth": 0}, {"key": "34665539", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994822480798075, "res": {"Yes": 0.9994822480798075, "No": 0.0005171903341739533}, "ground_truth": 1}, {"key": "34665539", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8767595478260393, "res": {"Yes": 0.8767595478260393, "No": 0.12323884219690094}, "ground_truth": 0}, {"key": "34665539", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9741983054874384, "res": {"Yes": 0.9741983054874384, "No": 0.025799986766011182}, "ground_truth": 0}, {"key": "37872111", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.41531078894699563, "res": {"No": 0.5846574843839653, "Yes": 0.41531078894699563}, "ground_truth": 0}, {"key": "37872111", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8168938574532647, "res": {"Yes": 0.8168938574532647, "No": 0.18310436283091316}, "ground_truth": 0}, {"key": "37872111", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9389124796766257, "res": {"Yes": 0.9389124796766257, "No": 0.061085912746592835}, "ground_truth": 1}, {"key": "37872111", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9198720267253824, "res": {"Yes": 0.9198720267253824, "No": 0.08011060793747872}, "ground_truth": 0}, {"key": "37872111", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8489333923404642, "res": {"Yes": 0.8489333923404642, "No": 0.15105662688248875}, "ground_truth": 0}, {"key": "36629542", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9839702166697497, "res": {"Yes": 0.9839702166697497, "No": 0.01602828347705246}, "ground_truth": 0}, {"key": "36629542", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986146516821516, "res": {"Yes": 0.9986146516821516, "No": 0.0013850154280757563}, "ground_truth": 0}, {"key": "36629542", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.942292955174265, "res": {"Yes": 0.942292955174265, "No": 0.05770543591155067}, "ground_truth": 1}, {"key": "36629542", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998059205581825, "res": {"Yes": 0.998059205581825, "No": 0.0019398273591415094}, "ground_truth": 0}, {"key": "36629542", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9916752383436409, "res": {"Yes": 0.9916752383436409, "No": 0.008323861576782995}, "ground_truth": 0}, {"key": "36487527", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.997946065316574, "res": {"Yes": 0.997946065316574, "No": 0.0020533903964747853}, "ground_truth": 0}, {"key": "36487527", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989549950983148, "res": {"Yes": 0.9989549950983148, "No": 0.0010442138221541653}, "ground_truth": 0}, {"key": "36487527", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9909093510854587, "res": {"Yes": 0.9909093510854587, "No": 0.009089476165600322}, "ground_truth": 1}, {"key": "36487527", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9978329384756344, "res": {"Yes": 0.9978329384756344, "No": 0.0021656732411032427}, "ground_truth": 0}, {"key": "36487527", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9817993013103146, "res": {"Yes": 0.9817993013103146, "No": 0.018200358268695087}, "ground_truth": 0}, {"key": "37344756", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.43621613324895614, "res": {"No": 0.5637825226412359, "Yes": 0.43621613324895614}, "ground_truth": 0}, {"key": "37344756", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8695040129150516, "res": {"Yes": 0.8695040129150516, "No": 0.13049361247095922}, "ground_truth": 0}, {"key": "37344756", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9955964571524535, "res": {"Yes": 0.9955964571524535, "No": 0.004403172390548826}, "ground_truth": 1}, {"key": "37344756", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7264919620279786, "res": {"Yes": 0.7264919620279786, "No": 0.27350234038236115}, "ground_truth": 0}, {"key": "37344756", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.5769088137835874, "res": {"Yes": 0.5769088137835874, "No": 0.42308947393440055}, "ground_truth": 0}, {"key": "38707722", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8485054086174539, "res": {"Yes": 0.8485054086174539, "No": 0.15149334233224596}, "ground_truth": 0}, {"key": "38707722", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9972291637725311, "res": {"Yes": 0.9972291637725311, "No": 0.002769731208870725}, "ground_truth": 0}, {"key": "38707722", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9519902460395834, "res": {"Yes": 0.9519902460395834, "No": 0.048008029539087865}, "ground_truth": 1}, {"key": "38707722", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9933881763343727, "res": {"Yes": 0.9933881763343727, "No": 0.006611077683875945}, "ground_truth": 0}, {"key": "38707722", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9762216919214997, "res": {"Yes": 0.9762216919214997, "No": 0.02377775973750152}, "ground_truth": 0}, {"key": "37093419", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.915013518041409, "res": {"Yes": 0.915013518041409, "No": 0.08498396392467569}, "ground_truth": 0}, {"key": "37093419", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9798446875195953, "res": {"Yes": 0.9798446875195953, "No": 0.020154231754676098}, "ground_truth": 1}, {"key": "37093419", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9170270268877174, "res": {"Yes": 0.9170270268877174, "No": 0.08297200643018406}, "ground_truth": 0}, {"key": "37093419", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9754366003342801, "res": {"Yes": 0.9754366003342801, "No": 0.024558209044573356}, "ground_truth": 0}, {"key": "35547391", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.03389442270863885, "res": {"No": 0.9661043343774471, "Yes": 0.03389442270863885}, "ground_truth": 0}, {"key": "35547391", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.261237062097812, "res": {"No": 0.7387616060470931, "Yes": 0.261237062097812}, "ground_truth": 0}, {"key": "35547391", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9978508627096941, "res": {"Yes": 0.9978508627096941, "No": 0.0021479486684300494}, "ground_truth": 1}, {"key": "35547391", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9965903592719251, "res": {"Yes": 0.9965903592719251, "No": 0.0034076853539372087}, "ground_truth": 0}, {"key": "35547391", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9589908215495753, "res": {"Yes": 0.9589908215495753, "No": 0.041007824049829646}, "ground_truth": 0}, {"key": "37173168", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.1770485659759164, "res": {"No": 0.8229508011356808, "Yes": 0.1770485659759164}, "ground_truth": 0}, {"key": "37173168", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.71665205149448, "res": {"Yes": 0.71665205149448, "No": 0.28334611710205526}, "ground_truth": 0}, {"key": "37173168", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.32189338202207773, "res": {"No": 0.6781057701756862, "Yes": 0.32189338202207773}, "ground_truth": 1}, {"key": "37173168", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.811651433558017, "res": {"Yes": 0.811651433558017, "No": 0.18834701370505794}, "ground_truth": 0}, {"key": "37173168", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.5353491115220236, "res": {"Yes": 0.5353491115220236, "No": 0.46464941685476013}, "ground_truth": 0}, {"key": "30725298", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9997325013541444, "res": {"Yes": 0.9997325013541444, "No": 0.0002672009662775837}, "ground_truth": 0}, {"key": "30725298", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.999932220207154, "res": {"Yes": 0.999932220207154, "No": 6.748697106338366e-05}, "ground_truth": 0}, {"key": "30725298", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9977315911368512, "res": {"Yes": 0.9977315911368512, "No": 0.002268085943216838}, "ground_truth": 1}, {"key": "30725298", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999380608083109, "res": {"Yes": 0.9999380608083109, "No": 6.174379952799284e-05}, "ground_truth": 0}, {"key": "30725298", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995013036503816, "res": {"Yes": 0.9995013036503816, "No": 0.0004986537454322577}, "ground_truth": 0}, {"key": "33830573", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.018153988324761547, "res": {"No": 0.9818416010808069, "Yes": 0.018153988324761547}, "ground_truth": 0}, {"key": "33830573", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.5608064174179112, "res": {"Yes": 0.5608064174179112, "No": 0.4391890591314454}, "ground_truth": 0}, {"key": "33830573", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.047657750849540194, "res": {"No": 0.9523400886571046, "Yes": 0.047657750849540194}, "ground_truth": 1}, {"key": "33830573", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5434140644553133, "res": {"Yes": 0.5434140644553133, "No": 0.45658221344159033}, "ground_truth": 0}, {"key": "33830573", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.535442002650945, "res": {"Yes": 0.535442002650945, "No": 0.46455279840876523}, "ground_truth": 0}, {"key": "33415474", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8811327353317285, "res": {"Yes": 0.8811327353317285, "No": 0.11886320320648644}, "ground_truth": 0}, {"key": "33415474", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986814657311704, "res": {"Yes": 0.9986814657311704, "No": 0.0013179584084008593}, "ground_truth": 0}, {"key": "33415474", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997257085847705, "res": {"Yes": 0.9997257085847705, "No": 0.00027353362509078855}, "ground_truth": 1}, {"key": "33415474", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.971981458811932, "res": {"Yes": 0.971981458811932, "No": 0.028017356103556736}, "ground_truth": 0}, {"key": "33415474", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9839445944189108, "res": {"Yes": 0.9839445944189108, "No": 0.0160544212508114}, "ground_truth": 0}, {"key": "37383994", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9072855765910325, "res": {"Yes": 0.9072855765910325, "No": 0.0927137026668834}, "ground_truth": 0}, {"key": "37383994", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8650212550846124, "res": {"Yes": 0.8650212550846124, "No": 0.1349778888843443}, "ground_truth": 0}, {"key": "37383994", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9893681796154338, "res": {"Yes": 0.9893681796154338, "No": 0.01063142012071108}, "ground_truth": 1}, {"key": "37383994", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9940273719814766, "res": {"Yes": 0.9940273719814766, "No": 0.005972032244737661}, "ground_truth": 0}, {"key": "37383994", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9300819708686704, "res": {"Yes": 0.9300819708686704, "No": 0.06991759113212503}, "ground_truth": 0}, {"key": "38576819", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.973597688003412, "res": {"Yes": 0.973597688003412, "No": 0.026400471741323202}, "ground_truth": 0}, {"key": "38576819", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9950706767864625, "res": {"Yes": 0.9950706767864625, "No": 0.004928959835018748}, "ground_truth": 0}, {"key": "38576819", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9982347473943896, "res": {"Yes": 0.9982347473943896, "No": 0.0017644892627836396}, "ground_truth": 1}, {"key": "38576819", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.996792030166253, "res": {"Yes": 0.996792030166253, "No": 0.0032077404479414562}, "ground_truth": 0}, {"key": "38576819", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.994078844250214, "res": {"Yes": 0.994078844250214, "No": 0.005920827851988395}, "ground_truth": 0}, {"key": "34500226", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9831865978269873, "res": {"Yes": 0.9831865978269873, "No": 0.016812443201083525}, "ground_truth": 0}, {"key": "34500226", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9934097042828074, "res": {"Yes": 0.9934097042828074, "No": 0.006589666096241729}, "ground_truth": 0}, {"key": "34500226", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9982409239909978, "res": {"Yes": 0.9982409239909978, "No": 0.0017587281028821839}, "ground_truth": 1}, {"key": "34500226", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9943857066822819, "res": {"Yes": 0.9943857066822819, "No": 0.00561347119195927}, "ground_truth": 0}, {"key": "34500226", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9965874003995409, "res": {"Yes": 0.9965874003995409, "No": 0.0034123970070867376}, "ground_truth": 0}, {"key": "39856394", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0013043466694559018, "res": {"No": 0.998693117715409, "Yes": 0.0013043466694559018}, "ground_truth": 0}, {"key": "39856394", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9939820230021732, "res": {"Yes": 0.9939820230021732, "No": 0.006017288117368051}, "ground_truth": 0}, {"key": "39856394", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.943855708202516, "res": {"Yes": 0.943855708202516, "No": 0.056141516252312625}, "ground_truth": 1}, {"key": "39856394", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9944150578017221, "res": {"Yes": 0.9944150578017221, "No": 0.005584353553899468}, "ground_truth": 0}, {"key": "39856394", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9125733255379533, "res": {"Yes": 0.9125733255379533, "No": 0.08742345224435932}, "ground_truth": 0}, {"key": "35499522", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 6.396522382641743e-05, "res": {"No": 0.9999355576904948, "Yes": 6.396522382641743e-05}, "ground_truth": 0}, {"key": "35499522", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9843998606731884, "res": {"Yes": 0.9843998606731884, "No": 0.015599361422805932}, "ground_truth": 0}, {"key": "35499522", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994970144996403, "res": {"Yes": 0.9994970144996403, "No": 0.0005024150434931083}, "ground_truth": 1}, {"key": "35499522", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988352178859073, "res": {"Yes": 0.9988352178859073, "No": 0.0011643832864304164}, "ground_truth": 0}, {"key": "35499522", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.999249618377209, "res": {"Yes": 0.999249618377209, "No": 0.0007499541399775899}, "ground_truth": 0}, {"key": "30157766", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9920190859632896, "res": {"Yes": 0.9920190859632896, "No": 0.007979700496433541}, "ground_truth": 0}, {"key": "30157766", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9820472300601201, "res": {"Yes": 0.9820472300601201, "No": 0.017952271968430616}, "ground_truth": 0}, {"key": "30157766", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9835173108863438, "res": {"Yes": 0.9835173108863438, "No": 0.016482550551522947}, "ground_truth": 1}, {"key": "30157766", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9672365508706177, "res": {"Yes": 0.9672365508706177, "No": 0.032763129384567526}, "ground_truth": 0}, {"key": "30157766", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.7976915460720901, "res": {"Yes": 0.7976915460720901, "No": 0.2023075327419861}, "ground_truth": 0}, {"key": "40472346", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.720207423692315, "res": {"Yes": 0.720207423692315, "No": 0.27979012792102215}, "ground_truth": 0}, {"key": "40472346", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9786519250393301, "res": {"Yes": 0.9786519250393301, "No": 0.02134718069037929}, "ground_truth": 0}, {"key": "40472346", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9344937314758769, "res": {"Yes": 0.9344937314758769, "No": 0.06550471855028248}, "ground_truth": 1}, {"key": "40472346", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.948557961387698, "res": {"Yes": 0.948557961387698, "No": 0.05144033945639468}, "ground_truth": 0}, {"key": "40472346", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.6596985306869961, "res": {"Yes": 0.6596985306869961, "No": 0.3402995227211715}, "ground_truth": 0}, {"key": "35305635", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.24220916692592698, "res": {"No": 0.757785223345399, "Yes": 0.24220916692592698}, "ground_truth": 0}, {"key": "35305635", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.04096498506368685, "res": {"No": 0.9590310472632131, "Yes": 0.04096498506368685}, "ground_truth": 0}, {"key": "35305635", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.13387688098564335, "res": {"No": 0.8661172820244731, "Yes": 0.13387688098564335}, "ground_truth": 1}, {"key": "35305635", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3883985017751229, "res": {"No": 0.6115962600329289, "Yes": 0.3883985017751229}, "ground_truth": 0}, {"key": "35305635", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.0018790767747854117, "res": {"No": 0.998120130372465, "Yes": 0.0018790767747854117}, "ground_truth": 0}, {"key": "32495926", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.3588543439436001, "res": {"No": 0.6411440245592218, "Yes": 0.3588543439436001}, "ground_truth": 0}, {"key": "32495926", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9863722175790671, "res": {"Yes": 0.9863722175790671, "No": 0.013627184786295222}, "ground_truth": 0}, {"key": "32495926", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9956537698142828, "res": {"Yes": 0.9956537698142828, "No": 0.004345703600512531}, "ground_truth": 1}, {"key": "32495926", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9882651804324709, "res": {"Yes": 0.9882651804324709, "No": 0.011734495988808631}, "ground_truth": 0}, {"key": "32495926", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.992481879915047, "res": {"Yes": 0.992481879915047, "No": 0.007517117692987758}, "ground_truth": 0}, {"key": "37353801", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9773025201055284, "res": {"Yes": 0.9773025201055284, "No": 0.022694787645551294}, "ground_truth": 0}, {"key": "37353801", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9806144117536659, "res": {"Yes": 0.9806144117536659, "No": 0.01938397083617195}, "ground_truth": 0}, {"key": "37353801", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9744357251079172, "res": {"Yes": 0.9744357251079172, "No": 0.025561729179013205}, "ground_truth": 1}, {"key": "37353801", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9900776872129399, "res": {"Yes": 0.9900776872129399, "No": 0.009921670248834398}, "ground_truth": 0}, {"key": "37353801", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.997711293890914, "res": {"Yes": 0.997711293890914, "No": 0.0022881844327360145}, "ground_truth": 0}, {"key": "30159904", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.06492477415445144, "res": {"No": 0.935074269712947, "Yes": 0.06492477415445144}, "ground_truth": 0}, {"key": "30159904", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9509918332624124, "res": {"Yes": 0.9509918332624124, "No": 0.04900664881375224}, "ground_truth": 0}, {"key": "30159904", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99820030210191, "res": {"Yes": 0.99820030210191, "No": 0.0017994136808564794}, "ground_truth": 1}, {"key": "30159904", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9913674770573312, "res": {"Yes": 0.9913674770573312, "No": 0.008631266751603842}, "ground_truth": 0}, {"key": "30159904", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9063139498057227, "res": {"Yes": 0.9063139498057227, "No": 0.09368323760795486}, "ground_truth": 0}, {"key": "33698679", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9910275914613739, "res": {"Yes": 0.9910275914613739, "No": 0.008971781645029639}, "ground_truth": 0}, {"key": "33698679", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988345035192149, "res": {"Yes": 0.9988345035192149, "No": 0.0011650930778566485}, "ground_truth": 0}, {"key": "33698679", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9960524182672551, "res": {"Yes": 0.9960524182672551, "No": 0.003945503511509125}, "ground_truth": 1}, {"key": "33698679", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9980226467438793, "res": {"Yes": 0.9980226467438793, "No": 0.001977011337719571}, "ground_truth": 0}, {"key": "33698679", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9879129978183406, "res": {"Yes": 0.9879129978183406, "No": 0.012085878186336803}, "ground_truth": 0}, {"key": "40530172", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.4074948640251132, "res": {"No": 0.592503594099815, "Yes": 0.4074948640251132}, "ground_truth": 0}, {"key": "40530172", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.7553596617379031, "res": {"Yes": 0.7553596617379031, "No": 0.24463981329116202}, "ground_truth": 0}, {"key": "40530172", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.49213544778598806, "res": {"No": 0.5078640425496385, "Yes": 0.49213544778598806}, "ground_truth": 1}, {"key": "40530172", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.34946411424169577, "res": {"No": 0.6505355514661931, "Yes": 0.34946411424169577}, "ground_truth": 0}, {"key": "40530172", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.7735922909929962, "res": {"Yes": 0.7735922909929962, "No": 0.226407116464508}, "ground_truth": 0}, {"key": "40652941", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.012973410010272815, "res": {"No": 0.9870260764389853, "Yes": 0.012973410010272815}, "ground_truth": 0}, {"key": "40652941", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9968105078713763, "res": {"Yes": 0.9968105078713763, "No": 0.003188571398357611}, "ground_truth": 0}, {"key": "40652941", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9833194697255044, "res": {"Yes": 0.9833194697255044, "No": 0.016678830473622504}, "ground_truth": 1}, {"key": "40652941", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9974806714483403, "res": {"Yes": 0.9974806714483403, "No": 0.002519282853259095}, "ground_truth": 0}, {"key": "40652941", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9974449661695357, "res": {"Yes": 0.9974449661695357, "No": 0.002553687208739064}, "ground_truth": 0}, {"key": "40122246", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.36626337520009816, "res": {"No": 0.6337353758023706, "Yes": 0.36626337520009816}, "ground_truth": 0}, {"key": "40122246", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.948528246377885, "res": {"Yes": 0.948528246377885, "No": 0.05147052321154968}, "ground_truth": 0}, {"key": "40122246", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4102156425959004, "res": {"No": 0.5897828680928668, "Yes": 0.4102156425959004}, "ground_truth": 1}, {"key": "40122246", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7986435123896801, "res": {"Yes": 0.7986435123896801, "No": 0.20135563620459213}, "ground_truth": 0}, {"key": "40122246", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9332465667544971, "res": {"Yes": 0.9332465667544971, "No": 0.06675260207931011}, "ground_truth": 0}, {"key": "40032656", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8083645875627933, "res": {"Yes": 0.8083645875627933, "No": 0.1916309676384602}, "ground_truth": 0}, {"key": "40032656", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9887215549618164, "res": {"Yes": 0.9887215549618164, "No": 0.011276605602076538}, "ground_truth": 0}, {"key": "40032656", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9881898513077689, "res": {"Yes": 0.9881898513077689, "No": 0.011809399462276713}, "ground_truth": 1}, {"key": "40032656", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.981857336200391, "res": {"Yes": 0.981857336200391, "No": 0.01814002420413011}, "ground_truth": 0}, {"key": "40032656", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9925727776520609, "res": {"Yes": 0.9925727776520609, "No": 0.007425093513217164}, "ground_truth": 0}, {"key": "38913680", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5198107894834906, "res": {"Yes": 0.5198107894834906, "No": 0.4801251115407652}, "ground_truth": 0}, {"key": "38913680", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.11138637120961753, "res": {"No": 0.8885616208444533, "Yes": 0.11138637120961753}, "ground_truth": 0}, {"key": "38913680", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.0051367843917769634, "res": {"No": 0.9947382726381837, "Yes": 0.0051367843917769634}, "ground_truth": 1}, {"key": "38913680", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.020517339932535794, "res": {"No": 0.979444369503613, "Yes": 0.020517339932535794}, "ground_truth": 0}, {"key": "38913680", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.008792932545509035, "res": {"No": 0.9911848552410342, "Yes": 0.008792932545509035}, "ground_truth": 0}, {"key": "17608039", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5960280197610314, "res": {"Yes": 0.5960280197610314, "No": 0.4039686633478705}, "ground_truth": 0}, {"key": "17608039", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9897932183732473, "res": {"Yes": 0.9897932183732473, "No": 0.010203744685875208}, "ground_truth": 0}, {"key": "17608039", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9782433487948338, "res": {"Yes": 0.9782433487948338, "No": 0.021753569934425294}, "ground_truth": 1}, {"key": "17608039", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9733089559095165, "res": {"Yes": 0.9733089559095165, "No": 0.02668958672364833}, "ground_truth": 0}, {"key": "17608039", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9837462482311072, "res": {"Yes": 0.9837462482311072, "No": 0.01625191550374204}, "ground_truth": 0}, {"key": "40434901", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 2.479950164388207e-05, "res": {"No": 0.9999748931371826, "Yes": 2.479950164388207e-05}, "ground_truth": 0}, {"key": "40434901", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.005845123822131148, "res": {"No": 0.994153426123224, "Yes": 0.005845123822131148}, "ground_truth": 0}, {"key": "40434901", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8366327033114627, "res": {"Yes": 0.8366327033114627, "No": 0.16336595042543628}, "ground_truth": 1}, {"key": "40434901", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8722691451514368, "res": {"Yes": 0.8722691451514368, "No": 0.12772933809558842}, "ground_truth": 0}, {"key": "40434901", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9687495139923716, "res": {"Yes": 0.9687495139923716, "No": 0.03125012196272945}, "ground_truth": 0}, {"key": "37680058", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9897337741674427, "res": {"Yes": 0.9897337741674427, "No": 0.010262426153405044}, "ground_truth": 0}, {"key": "37680058", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9891146716628797, "res": {"Yes": 0.9891146716628797, "No": 0.010884611241500324}, "ground_truth": 0}, {"key": "37680058", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9974786536467309, "res": {"Yes": 0.9974786536467309, "No": 0.0025204319082047975}, "ground_truth": 1}, {"key": "37680058", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9978800620492791, "res": {"Yes": 0.9978800620492791, "No": 0.002119846496989838}, "ground_truth": 0}, {"key": "37680058", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9414027074845039, "res": {"Yes": 0.9414027074845039, "No": 0.05858425526412993}, "ground_truth": 0}, {"key": "37291821", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.00017888310681437932, "res": {"No": 0.9998203128156763, "Yes": 0.00017888310681437932}, "ground_truth": 0}, {"key": "37291821", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9984816505606336, "res": {"Yes": 0.9984816505606336, "No": 0.001517472738403845}, "ground_truth": 0}, {"key": "37291821", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986971579365891, "res": {"Yes": 0.9986971579365891, "No": 0.0013021720513916506}, "ground_truth": 1}, {"key": "37291821", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990910954042371, "res": {"Yes": 0.9990910954042371, "No": 0.00090853004470812}, "ground_truth": 0}, {"key": "37291821", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9894600170319743, "res": {"Yes": 0.9894600170319743, "No": 0.010539849657130685}, "ground_truth": 0}, {"key": "41002743", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9994400947454806, "res": {"Yes": 0.9994400947454806, "No": 0.0005585059421672387}, "ground_truth": 0}, {"key": "41002743", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992816313204188, "res": {"Yes": 0.9992816313204188, "No": 0.0007179912324952545}, "ground_truth": 0}, {"key": "41002743", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999736672396801, "res": {"Yes": 0.999736672396801, "No": 0.00026294728056647226}, "ground_truth": 1}, {"key": "41002743", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999516492769339, "res": {"Yes": 0.9999516492769339, "No": 4.827493867387799e-05}, "ground_truth": 0}, {"key": "41002743", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.999918155229466, "res": {"Yes": 0.999918155229466, "No": 8.125631635680059e-05}, "ground_truth": 0}, {"key": "36322869", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8225925663502384, "res": {"Yes": 0.8225925663502384, "No": 0.1774067961656978}, "ground_truth": 0}, {"key": "36322869", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985915915394271, "res": {"Yes": 0.9985915915394271, "No": 0.0014080549429867357}, "ground_truth": 0}, {"key": "36322869", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9974524385564895, "res": {"Yes": 0.9974524385564895, "No": 0.0025475471110152953}, "ground_truth": 1}, {"key": "36322869", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9627997362633325, "res": {"Yes": 0.9627997362633325, "No": 0.0371997365394859}, "ground_truth": 0}, {"key": "36322869", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9526959293782569, "res": {"Yes": 0.9526959293782569, "No": 0.04730364145896931}, "ground_truth": 0}, {"key": "39459717", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9764290761245165, "res": {"Yes": 0.9764290761245165, "No": 0.023568764069548796}, "ground_truth": 0}, {"key": "39459717", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9917189711942571, "res": {"Yes": 0.9917189711942571, "No": 0.008279698701125623}, "ground_truth": 0}, {"key": "39459717", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9900129481496397, "res": {"Yes": 0.9900129481496397, "No": 0.009986181416348426}, "ground_truth": 1}, {"key": "39459717", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9943908858542841, "res": {"Yes": 0.9943908858542841, "No": 0.005608084845474972}, "ground_truth": 0}, {"key": "39459717", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 3.3359335257507944e-06, "res": {"No": 0.9999946806438478, "Yes": 3.3359335257507944e-06}, "ground_truth": 0}, {"key": "36503727", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9493983412714034, "res": {"Yes": 0.9493983412714034, "No": 0.050600177236359484}, "ground_truth": 0}, {"key": "36503727", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9925920359344333, "res": {"Yes": 0.9925920359344333, "No": 0.00740735761822977}, "ground_truth": 0}, {"key": "36503727", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9942897600514505, "res": {"Yes": 0.9942897600514505, "No": 0.0057096531583786855}, "ground_truth": 1}, {"key": "36503727", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995992010113826, "res": {"Yes": 0.9995992010113826, "No": 0.0004000680101235826}, "ground_truth": 0}, {"key": "36503727", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9944566660920594, "res": {"Yes": 0.9944566660920594, "No": 0.005542181960931267}, "ground_truth": 0}, {"key": "35682367", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9965953233008675, "res": {"Yes": 0.9965953233008675, "No": 0.0034046368399589353}, "ground_truth": 0}, {"key": "35682367", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992922255905554, "res": {"Yes": 0.9992922255905554, "No": 0.0007077116650321064}, "ground_truth": 0}, {"key": "35682367", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989954426059453, "res": {"Yes": 0.9989954426059453, "No": 0.0010045234791653736}, "ground_truth": 1}, {"key": "35682367", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995695426233722, "res": {"Yes": 0.9995695426233722, "No": 0.0004303863346367722}, "ground_truth": 0}, {"key": "35682367", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.999737029902699, "res": {"Yes": 0.999737029902699, "No": 0.0002628250385044909}, "ground_truth": 0}, {"key": "36472353", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6420278603820773, "res": {"Yes": 0.6420278603820773, "No": 0.3579699266073661}, "ground_truth": 0}, {"key": "36472353", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9899403897896734, "res": {"Yes": 0.9899403897896734, "No": 0.010059163186769998}, "ground_truth": 0}, {"key": "36472353", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9968481701771417, "res": {"Yes": 0.9968481701771417, "No": 0.0031514960575109783}, "ground_truth": 1}, {"key": "36472353", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9970462818344313, "res": {"Yes": 0.9970462818344313, "No": 0.0029534833754361507}, "ground_truth": 0}, {"key": "36472353", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9725986782496086, "res": {"Yes": 0.9725986782496086, "No": 0.02739940448358591}, "ground_truth": 0}, {"key": "37651907", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9987210576447628, "res": {"Yes": 0.9987210576447628, "No": 0.0012777762712136939}, "ground_truth": 0}, {"key": "37651907", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.988874708915368, "res": {"Yes": 0.988874708915368, "No": 0.011123764647688023}, "ground_truth": 0}, {"key": "37651907", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.993362644500238, "res": {"Yes": 0.993362644500238, "No": 0.006636680269020992}, "ground_truth": 1}, {"key": "37651907", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9904867302054069, "res": {"Yes": 0.9904867302054069, "No": 0.009512952385250274}, "ground_truth": 0}, {"key": "37651907", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9877885217078198, "res": {"Yes": 0.9877885217078198, "No": 0.01221020746015724}, "ground_truth": 0}, {"key": "36255476", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.991610059655631, "res": {"Yes": 0.991610059655631, "No": 0.008389457675368882}, "ground_truth": 0}, {"key": "36255476", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9890499461166091, "res": {"Yes": 0.9890499461166091, "No": 0.010949358597565906}, "ground_truth": 0}, {"key": "36255476", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9687617106255304, "res": {"Yes": 0.9687617106255304, "No": 0.031236449901023835}, "ground_truth": 1}, {"key": "36255476", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9975720006702428, "res": {"Yes": 0.9975720006702428, "No": 0.0024276562335839437}, "ground_truth": 0}, {"key": "36255476", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9456767029302695, "res": {"Yes": 0.9456767029302695, "No": 0.05432281361701179}, "ground_truth": 0}, {"key": "37283518", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9260629093319245, "res": {"Yes": 0.9260629093319245, "No": 0.07393443441099792}, "ground_truth": 0}, {"key": "37283518", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996386203933458, "res": {"Yes": 0.9996386203933458, "No": 0.00036096754742570385}, "ground_truth": 0}, {"key": "37283518", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995538183186646, "res": {"Yes": 0.9995538183186646, "No": 0.0004453933649884249}, "ground_truth": 1}, {"key": "37283518", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9983417866574846, "res": {"Yes": 0.9983417866574846, "No": 0.0016552978774071153}, "ground_truth": 0}, {"key": "37283518", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9967300896398407, "res": {"Yes": 0.9967300896398407, "No": 0.00326857064910174}, "ground_truth": 0}, {"key": "34906785", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9882714697720919, "res": {"Yes": 0.9882714697720919, "No": 0.011727321365285391}, "ground_truth": 0}, {"key": "34906785", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985418978653873, "res": {"Yes": 0.9985418978653873, "No": 0.001457930958621614}, "ground_truth": 0}, {"key": "34906785", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.996123141894134, "res": {"Yes": 0.996123141894134, "No": 0.0038759805617289626}, "ground_truth": 1}, {"key": "34906785", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9982587476419909, "res": {"Yes": 0.9982587476419909, "No": 0.001740790427893971}, "ground_truth": 0}, {"key": "34906785", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9926332584428362, "res": {"Yes": 0.9926332584428362, "No": 0.0073664947242388255}, "ground_truth": 0}, {"key": "34965328", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9334245687800998, "res": {"Yes": 0.9334245687800998, "No": 0.06657433074367249}, "ground_truth": 0}, {"key": "34965328", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9929797751165732, "res": {"Yes": 0.9929797751165732, "No": 0.007018775544629332}, "ground_truth": 0}, {"key": "34965328", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9940329076359222, "res": {"Yes": 0.9940329076359222, "No": 0.005966829289210551}, "ground_truth": 1}, {"key": "34965328", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9589002635273329, "res": {"Yes": 0.9589002635273329, "No": 0.04109806659790228}, "ground_truth": 0}, {"key": "34965328", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9656011193230248, "res": {"Yes": 0.9656011193230248, "No": 0.034397128222213205}, "ground_truth": 0}, {"key": "38788440", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9903503812638913, "res": {"Yes": 0.9903503812638913, "No": 0.00964875873887886}, "ground_truth": 0}, {"key": "38788440", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9975361806974692, "res": {"Yes": 0.9975361806974692, "No": 0.0024625078428731394}, "ground_truth": 0}, {"key": "38788440", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9957028134420445, "res": {"Yes": 0.9957028134420445, "No": 0.004295943816315254}, "ground_truth": 1}, {"key": "38788440", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986221438166807, "res": {"Yes": 0.9986221438166807, "No": 0.0013761668356254023}, "ground_truth": 0}, {"key": "38788440", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9897221042853114, "res": {"Yes": 0.9897221042853114, "No": 0.0102771900269728}, "ground_truth": 0}, {"key": "35046866", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8942565502648514, "res": {"Yes": 0.8942565502648514, "No": 0.10573876582840937}, "ground_truth": 0}, {"key": "35046866", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9917514525059127, "res": {"Yes": 0.9917514525059127, "No": 0.008248194096425913}, "ground_truth": 0}, {"key": "35046866", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9905345748414464, "res": {"Yes": 0.9905345748414464, "No": 0.00946425697291316}, "ground_truth": 1}, {"key": "35046866", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9965396910160191, "res": {"Yes": 0.9965396910160191, "No": 0.0034584408944759505}, "ground_truth": 0}, {"key": "35046866", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9803473954298793, "res": {"Yes": 0.9803473954298793, "No": 0.019650627644127568}, "ground_truth": 0}, {"key": "37629558", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9997090321193669, "res": {"Yes": 0.9997090321193669, "No": 0.0002896523926410103}, "ground_truth": 0}, {"key": "37629558", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9824341307301625, "res": {"Yes": 0.9824341307301625, "No": 0.017561529380608433}, "ground_truth": 0}, {"key": "37629558", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981343824319335, "res": {"Yes": 0.9981343824319335, "No": 0.0018644376490775598}, "ground_truth": 1}, {"key": "37629558", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998002940738838, "res": {"Yes": 0.9998002940738838, "No": 0.00019887620743471026}, "ground_truth": 0}, {"key": "37629558", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987715971052815, "res": {"Yes": 0.9987715971052815, "No": 0.0012275708267011176}, "ground_truth": 0}, {"key": "33859914", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9652634555222117, "res": {"Yes": 0.9652634555222117, "No": 0.03473551946015757}, "ground_truth": 0}, {"key": "33859914", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9600581763421272, "res": {"Yes": 0.9600581763421272, "No": 0.03993986624094303}, "ground_truth": 0}, {"key": "33859914", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8565966710882906, "res": {"Yes": 0.8565966710882906, "No": 0.14339979218725346}, "ground_truth": 1}, {"key": "33859914", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.987067545236643, "res": {"Yes": 0.987067545236643, "No": 0.012931216591776283}, "ground_truth": 0}, {"key": "33859914", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.964500996942778, "res": {"Yes": 0.964500996942778, "No": 0.03549471382238917}, "ground_truth": 0}, {"key": "39790523", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9798667803268272, "res": {"Yes": 0.9798667803268272, "No": 0.020132327954320642}, "ground_truth": 0}, {"key": "39790523", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9939221980186117, "res": {"Yes": 0.9939221980186117, "No": 0.006077248103626825}, "ground_truth": 0}, {"key": "39790523", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991517808913819, "res": {"Yes": 0.9991517808913819, "No": 0.0008477348058777537}, "ground_truth": 1}, {"key": "39790523", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.997225248159907, "res": {"Yes": 0.997225248159907, "No": 0.0027744892577070304}, "ground_truth": 0}, {"key": "39790523", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9914976581018627, "res": {"Yes": 0.9914976581018627, "No": 0.008501936652221171}, "ground_truth": 0}, {"key": "33509656", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9970959282980059, "res": {"Yes": 0.9970959282980059, "No": 0.0029038495244835837}, "ground_truth": 0}, {"key": "33509656", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8888521693419057, "res": {"Yes": 0.8888521693419057, "No": 0.11114727003079632}, "ground_truth": 0}, {"key": "33509656", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997441765883909, "res": {"Yes": 0.9997441765883909, "No": 0.00025545200840368387}, "ground_truth": 1}, {"key": "33509656", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999088581436714, "res": {"Yes": 0.9999088581436714, "No": 9.103240645055508e-05}, "ground_truth": 0}, {"key": "33509656", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998073257140748, "res": {"Yes": 0.9998073257140748, "No": 0.00019263160027848574}, "ground_truth": 0}, {"key": "17380923", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8430663053838349, "res": {"Yes": 0.8430663053838349, "No": 0.15693149601007217}, "ground_truth": 0}, {"key": "17380923", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.999787780261795, "res": {"Yes": 0.999787780261795, "No": 0.0002118750169138885}, "ground_truth": 0}, {"key": "17380923", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999222078259681, "res": {"Yes": 0.9999222078259681, "No": 7.762469087540323e-05}, "ground_truth": 1}, {"key": "17380923", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994259248442339, "res": {"Yes": 0.9994259248442339, "No": 0.0005736454703163585}, "ground_truth": 0}, {"key": "17380923", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996607808478312, "res": {"Yes": 0.9996607808478312, "No": 0.00033860351748488007}, "ground_truth": 0}, {"key": "36202526", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.935976868454086, "res": {"Yes": 0.935976868454086, "No": 0.06402142410491485}, "ground_truth": 0}, {"key": "36202526", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9941945381999024, "res": {"Yes": 0.9941945381999024, "No": 0.005805212837448093}, "ground_truth": 0}, {"key": "36202526", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9834543510739234, "res": {"Yes": 0.9834543510739234, "No": 0.0165453695798614}, "ground_truth": 1}, {"key": "36202526", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9629134602188751, "res": {"Yes": 0.9629134602188751, "No": 0.03708609416432716}, "ground_truth": 0}, {"key": "36202526", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9775923801898506, "res": {"Yes": 0.9775923801898506, "No": 0.022407187523603052}, "ground_truth": 0}, {"key": "26419232", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9323138090310549, "res": {"Yes": 0.9323138090310549, "No": 0.06768489098286266}, "ground_truth": 0}, {"key": "26419232", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992012922369882, "res": {"Yes": 0.9992012922369882, "No": 0.0007986030054051084}, "ground_truth": 0}, {"key": "26419232", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9972576168217682, "res": {"Yes": 0.9972576168217682, "No": 0.002742181246272731}, "ground_truth": 1}, {"key": "26419232", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9932151497409001, "res": {"Yes": 0.9932151497409001, "No": 0.006784744113267968}, "ground_truth": 0}, {"key": "26419232", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9978746011655819, "res": {"Yes": 0.9978746011655819, "No": 0.0021251062029410427}, "ground_truth": 0}, {"key": "34232398", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9934867649695356, "res": {"Yes": 0.9934867649695356, "No": 0.006513102251111241}, "ground_truth": 0}, {"key": "34232398", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9623670829429186, "res": {"Yes": 0.9623670829429186, "No": 0.0376327988118773}, "ground_truth": 0}, {"key": "34232398", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983737522762163, "res": {"Yes": 0.9983737522762163, "No": 0.0016261971934910734}, "ground_truth": 1}, {"key": "34232398", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7255874618718147, "res": {"Yes": 0.7255874618718147, "No": 0.2744119869127947}, "ground_truth": 0}, {"key": "34232398", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9962982448510466, "res": {"Yes": 0.9962982448510466, "No": 0.003701662262810727}, "ground_truth": 0}, {"key": "33586045", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.007410356723571598, "res": {"No": 0.9925889854060168, "Yes": 0.007410356723571598}, "ground_truth": 0}, {"key": "33586045", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9235007160293925, "res": {"Yes": 0.9235007160293925, "No": 0.07649897017537967}, "ground_truth": 0}, {"key": "33586045", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.987136309718189, "res": {"Yes": 0.987136309718189, "No": 0.012863626519063401}, "ground_truth": 1}, {"key": "33586045", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9656929465619063, "res": {"Yes": 0.9656929465619063, "No": 0.034306700295336987}, "ground_truth": 0}, {"key": "33586045", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.928268867731584, "res": {"Yes": 0.928268867731584, "No": 0.07173075380332614}, "ground_truth": 0}, {"key": "32281151", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.38836608738883593, "res": {"No": 0.6116315868531371, "Yes": 0.38836608738883593}, "ground_truth": 0}, {"key": "32281151", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9928922180204526, "res": {"Yes": 0.9928922180204526, "No": 0.007106765935785809}, "ground_truth": 0}, {"key": "32281151", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9966175882863741, "res": {"Yes": 0.9966175882863741, "No": 0.0033816980972822945}, "ground_truth": 1}, {"key": "32281151", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9817965454035437, "res": {"Yes": 0.9817965454035437, "No": 0.018194340405324177}, "ground_truth": 0}, {"key": "32281151", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9898722891830753, "res": {"Yes": 0.9898722891830753, "No": 0.010126069924670366}, "ground_truth": 0}, {"key": "37308159", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0068102142379388945, "res": {"No": 0.9931857619326145, "Yes": 0.0068102142379388945}, "ground_truth": 0}, {"key": "37308159", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.011937164240751953, "res": {"No": 0.9880535631393336, "Yes": 0.011937164240751953}, "ground_truth": 0}, {"key": "37308159", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.0225329724559064, "res": {"No": 0.9774449830806822, "Yes": 0.0225329724559064}, "ground_truth": 1}, {"key": "37308159", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.0477921203169121, "res": {"No": 0.9521804288934513, "Yes": 0.0477921203169121}, "ground_truth": 0}, {"key": "37308159", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 1.7600662756673243e-05, "res": {"No": 0.9999800187796273, "Yes": 1.7600662756673243e-05}, "ground_truth": 0}, {"key": "35694408", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.946466055647645, "res": {"Yes": 0.946466055647645, "No": 0.05353322445400945}, "ground_truth": 0}, {"key": "35694408", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9879548793017917, "res": {"Yes": 0.9879548793017917, "No": 0.012044417552997653}, "ground_truth": 0}, {"key": "35694408", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99291948480378, "res": {"Yes": 0.99291948480378, "No": 0.007079790231745282}, "ground_truth": 1}, {"key": "35694408", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9948425638790833, "res": {"Yes": 0.9948425638790833, "No": 0.005155817449406911}, "ground_truth": 0}, {"key": "35694408", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9961341411465935, "res": {"Yes": 0.9961341411465935, "No": 0.003864360770050488}, "ground_truth": 0}, {"key": "39781995", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9866037620930265, "res": {"Yes": 0.9866037620930265, "No": 0.013395331878437935}, "ground_truth": 0}, {"key": "39781995", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.996291371213907, "res": {"Yes": 0.996291371213907, "No": 0.003707898518169503}, "ground_truth": 0}, {"key": "39781995", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9977919982300636, "res": {"Yes": 0.9977919982300636, "No": 0.0022073616503462405}, "ground_truth": 1}, {"key": "39781995", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9971184492601028, "res": {"Yes": 0.9971184492601028, "No": 0.002881250147575657}, "ground_truth": 0}, {"key": "39781995", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9974705863712467, "res": {"Yes": 0.9974705863712467, "No": 0.002529156578510246}, "ground_truth": 0}, {"key": "22799372", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8481325639457952, "res": {"Yes": 0.8481325639457952, "No": 0.15186641397247067}, "ground_truth": 0}, {"key": "22799372", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9888058144366844, "res": {"Yes": 0.9888058144366844, "No": 0.011192880191608757}, "ground_truth": 0}, {"key": "22799372", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9970333671775847, "res": {"Yes": 0.9970333671775847, "No": 0.002965525582546176}, "ground_truth": 1}, {"key": "22799372", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986680282628473, "res": {"Yes": 0.9986680282628473, "No": 0.0013310622118204208}, "ground_truth": 0}, {"key": "22799372", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9872067118287929, "res": {"Yes": 0.9872067118287929, "No": 0.0127924081154487}, "ground_truth": 0}, {"key": "37428240", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9964201280752717, "res": {"Yes": 0.9964201280752717, "No": 0.003579518445669327}, "ground_truth": 0}, {"key": "37428240", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9783127106636685, "res": {"Yes": 0.9783127106636685, "No": 0.02168625563460324}, "ground_truth": 0}, {"key": "37428240", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991924818783884, "res": {"Yes": 0.9991924818783884, "No": 0.0008070733513167094}, "ground_truth": 1}, {"key": "37428240", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989934218400078, "res": {"Yes": 0.9989934218400078, "No": 0.001006470120361699}, "ground_truth": 0}, {"key": "37428240", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9844615539611651, "res": {"Yes": 0.9844615539611651, "No": 0.015537978906463136}, "ground_truth": 0}, {"key": "40612657", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9869293545968973, "res": {"Yes": 0.9869293545968973, "No": 0.013068591800959446}, "ground_truth": 0}, {"key": "40612657", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9886938138016098, "res": {"Yes": 0.9886938138016098, "No": 0.011305273281919211}, "ground_truth": 0}, {"key": "40612657", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9949489927553777, "res": {"Yes": 0.9949489927553777, "No": 0.005050138552159717}, "ground_truth": 1}, {"key": "40612657", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9891196874760974, "res": {"Yes": 0.9891196874760974, "No": 0.010879972120626942}, "ground_truth": 0}, {"key": "40612657", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9339410671629054, "res": {"Yes": 0.9339410671629054, "No": 0.06605549215463939}, "ground_truth": 0}, {"key": "34404662", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 4.169701948032548e-06, "res": {"No": 0.9999955150656573, "Yes": 4.169701948032548e-06}, "ground_truth": 0}, {"key": "34404662", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9080524297794493, "res": {"Yes": 0.9080524297794493, "No": 0.09194570860347462}, "ground_truth": 0}, {"key": "34404662", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8513516466108098, "res": {"Yes": 0.8513516466108098, "No": 0.1486464516981782}, "ground_truth": 1}, {"key": "34404662", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8337678355642494, "res": {"Yes": 0.8337678355642494, "No": 0.16622737482261088}, "ground_truth": 0}, {"key": "34404662", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.5235264628983806, "res": {"Yes": 0.5235264628983806, "No": 0.4764708466624446}, "ground_truth": 0}, {"key": "32619704", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.41636112764383326, "res": {"No": 0.5836372128842119, "Yes": 0.41636112764383326}, "ground_truth": 0}, {"key": "32619704", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.6103367652325062, "res": {"Yes": 0.6103367652325062, "No": 0.3896563430441869}, "ground_truth": 0}, {"key": "32619704", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9629675158460972, "res": {"Yes": 0.9629675158460972, "No": 0.03703119920103388}, "ground_truth": 1}, {"key": "32619704", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7282130299745273, "res": {"Yes": 0.7282130299745273, "No": 0.27178550726705203}, "ground_truth": 0}, {"key": "32619704", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.681142284501029, "res": {"Yes": 0.681142284501029, "No": 0.3188530629582318}, "ground_truth": 0}, {"key": "39014883", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9937721814566874, "res": {"Yes": 0.9937721814566874, "No": 0.0062271220696492454}, "ground_truth": 0}, {"key": "39014883", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9961048174809756, "res": {"Yes": 0.9961048174809756, "No": 0.003894850433545183}, "ground_truth": 0}, {"key": "39014883", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.994453601877455, "res": {"Yes": 0.994453601877455, "No": 0.00554622756696801}, "ground_truth": 1}, {"key": "39014883", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992394974287799, "res": {"Yes": 0.9992394974287799, "No": 0.0007601745347977439}, "ground_truth": 0}, {"key": "39014883", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9973445197419313, "res": {"Yes": 0.9973445197419313, "No": 0.0026550454653745444}, "ground_truth": 0}, {"key": "37982812", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8034313629837131, "res": {"Yes": 0.8034313629837131, "No": 0.1965651402624072}, "ground_truth": 0}, {"key": "37982812", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9149687017768545, "res": {"Yes": 0.9149687017768545, "No": 0.08502843066546678}, "ground_truth": 0}, {"key": "37982812", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.902478961864742, "res": {"Yes": 0.902478961864742, "No": 0.09751421663176195}, "ground_truth": 1}, {"key": "37982812", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9780421438544815, "res": {"Yes": 0.9780421438544815, "No": 0.02195456913666033}, "ground_truth": 0}, {"key": "37982812", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9084428309348711, "res": {"Yes": 0.9084428309348711, "No": 0.09155499033073596}, "ground_truth": 0}, {"key": "28123476", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.22767666329222103, "res": {"No": 0.7723208550127404, "Yes": 0.22767666329222103}, "ground_truth": 0}, {"key": "28123476", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9959890240384269, "res": {"Yes": 0.9959890240384269, "No": 0.004010591669676982}, "ground_truth": 0}, {"key": "28123476", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9765658258038235, "res": {"Yes": 0.9765658258038235, "No": 0.023433187316789603}, "ground_truth": 1}, {"key": "28123476", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9941123087878319, "res": {"Yes": 0.9941123087878319, "No": 0.005887173957156418}, "ground_truth": 0}, {"key": "28123476", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9954362536833036, "res": {"Yes": 0.9954362536833036, "No": 0.004563157321431217}, "ground_truth": 0}, {"key": "39078849", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8254255561528098, "res": {"Yes": 0.8254255561528098, "No": 0.1745732769969032}, "ground_truth": 0}, {"key": "39078849", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9864604909921368, "res": {"Yes": 0.9864604909921368, "No": 0.013538402529926434}, "ground_truth": 0}, {"key": "39078849", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6754830597409807, "res": {"Yes": 0.6754830597409807, "No": 0.3245159796943681}, "ground_truth": 1}, {"key": "39078849", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9929049098456357, "res": {"Yes": 0.9929049098456357, "No": 0.007094176803345427}, "ground_truth": 0}, {"key": "39078849", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9484203602714212, "res": {"Yes": 0.9484203602714212, "No": 0.05157669272055758}, "ground_truth": 0}, {"key": "39414137", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9108039845697726, "res": {"Yes": 0.9108039845697726, "No": 0.08918284308434889}, "ground_truth": 0}, {"key": "39414137", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.47526237453696374, "res": {"No": 0.5247326679413999, "Yes": 0.47526237453696374}, "ground_truth": 0}, {"key": "39414137", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9138530749109168, "res": {"Yes": 0.9138530749109168, "No": 0.08614539247732174}, "ground_truth": 1}, {"key": "39414137", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6574763402423195, "res": {"Yes": 0.6574763402423195, "No": 0.34251935908544606}, "ground_truth": 0}, {"key": "39414137", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8012013501757453, "res": {"Yes": 0.8012013501757453, "No": 0.1987891626656237}, "ground_truth": 0}, {"key": "37371354", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.2779966821708357, "res": {"No": 0.7220015668061824, "Yes": 0.2779966821708357}, "ground_truth": 0}, {"key": "37371354", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9183088656440271, "res": {"Yes": 0.9183088656440271, "No": 0.08168787475024643}, "ground_truth": 0}, {"key": "37371354", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9846743141680488, "res": {"Yes": 0.9846743141680488, "No": 0.015324808395172685}, "ground_truth": 1}, {"key": "37371354", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9748021633088629, "res": {"Yes": 0.9748021633088629, "No": 0.025196605397169943}, "ground_truth": 0}, {"key": "37371354", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9831037491664109, "res": {"Yes": 0.9831037491664109, "No": 0.016895431821712756}, "ground_truth": 0}, {"key": "29497179", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 4.350846678820503e-05, "res": {"No": 0.9999558212119114, "Yes": 4.350846678820503e-05}, "ground_truth": 0}, {"key": "29497179", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9983979901898495, "res": {"Yes": 0.9983979901898495, "No": 0.0016016654257406465}, "ground_truth": 0}, {"key": "29497179", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998509394606263, "res": {"Yes": 0.9998509394606263, "No": 0.0001489617494613878}, "ground_truth": 1}, {"key": "29497179", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9967829136477138, "res": {"Yes": 0.9967829136477138, "No": 0.0032168521202421098}, "ground_truth": 0}, {"key": "29497179", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9806184224748119, "res": {"Yes": 0.9806184224748119, "No": 0.019381391001345477}, "ground_truth": 0}, {"key": "35908694", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9938925169631972, "res": {"Yes": 0.9938925169631972, "No": 0.0061062056820558154}, "ground_truth": 0}, {"key": "35908694", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986983484841807, "res": {"Yes": 0.9986983484841807, "No": 0.0013013540235258304}, "ground_truth": 0}, {"key": "35908694", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9964412046763692, "res": {"Yes": 0.9964412046763692, "No": 0.0035579480170693043}, "ground_truth": 1}, {"key": "35908694", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.990945402014341, "res": {"Yes": 0.990945402014341, "No": 0.009053715980188502}, "ground_truth": 0}, {"key": "35908694", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9877909684630184, "res": {"Yes": 0.9877909684630184, "No": 0.01220735060587655}, "ground_truth": 0}, {"key": "37619358", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9094546089190123, "res": {"Yes": 0.9094546089190123, "No": 0.09054318135754665}, "ground_truth": 0}, {"key": "37619358", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9071779426151994, "res": {"Yes": 0.9071779426151994, "No": 0.09282124798898624}, "ground_truth": 0}, {"key": "37619358", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8393758072312162, "res": {"Yes": 0.8393758072312162, "No": 0.1606235741916356}, "ground_truth": 1}, {"key": "37619358", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8546573676103338, "res": {"Yes": 0.8546573676103338, "No": 0.14533989154640914}, "ground_truth": 0}, {"key": "37619358", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.4260988147943873, "res": {"No": 0.5738972302894255, "Yes": 0.4260988147943873}, "ground_truth": 0}, {"key": "37293103", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9341450154601655, "res": {"Yes": 0.9341450154601655, "No": 0.06585427056318217}, "ground_truth": 0}, {"key": "37293103", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9597831264400833, "res": {"Yes": 0.9597831264400833, "No": 0.04021580103552654}, "ground_truth": 0}, {"key": "37293103", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9494435906746332, "res": {"Yes": 0.9494435906746332, "No": 0.05055570405601474}, "ground_truth": 1}, {"key": "37293103", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9863318570400581, "res": {"Yes": 0.9863318570400581, "No": 0.01366785709810426}, "ground_truth": 0}, {"key": "37293103", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9860373852729225, "res": {"Yes": 0.9860373852729225, "No": 0.013961630367001456}, "ground_truth": 0}, {"key": "36883729", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.021748134396497434, "res": {"No": 0.9782452876750725, "Yes": 0.021748134396497434}, "ground_truth": 0}, {"key": "36883729", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9918772780433801, "res": {"Yes": 0.9918772780433801, "No": 0.008120480107638424}, "ground_truth": 0}, {"key": "36883729", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9884181757239319, "res": {"Yes": 0.9884181757239319, "No": 0.011577782721479406}, "ground_truth": 1}, {"key": "36883729", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9933026564461613, "res": {"Yes": 0.9933026564461613, "No": 0.0066964710520111435}, "ground_truth": 0}, {"key": "36883729", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.904496512387493, "res": {"Yes": 0.904496512387493, "No": 0.09549484614688986}, "ground_truth": 0}, {"key": "39209521", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.976408952128636, "res": {"Yes": 0.976408952128636, "No": 0.02359022140645965}, "ground_truth": 0}, {"key": "39209521", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9889790545462702, "res": {"Yes": 0.9889790545462702, "No": 0.01101980687902841}, "ground_truth": 0}, {"key": "39209521", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9916338595742825, "res": {"Yes": 0.9916338595742825, "No": 0.008365498104934687}, "ground_truth": 1}, {"key": "39209521", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9143556860508179, "res": {"Yes": 0.9143556860508179, "No": 0.08564411212564409}, "ground_truth": 0}, {"key": "39209521", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.995905899474664, "res": {"Yes": 0.995905899474664, "No": 0.004091568034014693}, "ground_truth": 0}, {"key": "27792571", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9817223273354678, "res": {"Yes": 0.9817223273354678, "No": 0.018275736010224232}, "ground_truth": 0}, {"key": "27792571", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9777345852538157, "res": {"Yes": 0.9777345852538157, "No": 0.02226057727906457}, "ground_truth": 0}, {"key": "27792571", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7662707286697993, "res": {"Yes": 0.7662707286697993, "No": 0.2337252263405116}, "ground_truth": 1}, {"key": "27792571", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9651671695035765, "res": {"Yes": 0.9651671695035765, "No": 0.03482996498208284}, "ground_truth": 0}, {"key": "27792571", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8674230797226941, "res": {"Yes": 0.8674230797226941, "No": 0.13257581612876895}, "ground_truth": 0}, {"key": "39755647", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9853117927850704, "res": {"Yes": 0.9853117927850704, "No": 0.014687473372728722}, "ground_truth": 0}, {"key": "39755647", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988586482379538, "res": {"Yes": 0.9988586482379538, "No": 0.001140673958036903}, "ground_truth": 0}, {"key": "39755647", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989390534022285, "res": {"Yes": 0.9989390534022285, "No": 0.0010603216218925391}, "ground_truth": 1}, {"key": "39755647", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994517628669264, "res": {"Yes": 0.9994517628669264, "No": 0.0005475909452624315}, "ground_truth": 0}, {"key": "39755647", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987268874962079, "res": {"Yes": 0.9987268874962079, "No": 0.0012725448049279119}, "ground_truth": 0}, {"key": "40800537", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.09168076333229783, "res": {"No": 0.9083176016384521, "Yes": 0.09168076333229783}, "ground_truth": 0}, {"key": "40800537", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.996458599901603, "res": {"Yes": 0.996458599901603, "No": 0.0035410315357636683}, "ground_truth": 0}, {"key": "40800537", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9875540761973457, "res": {"Yes": 0.9875540761973457, "No": 0.012445464744335787}, "ground_truth": 1}, {"key": "40800537", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9957882736019884, "res": {"Yes": 0.9957882736019884, "No": 0.004211381659392278}, "ground_truth": 0}, {"key": "40800537", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9919028400420903, "res": {"Yes": 0.9919028400420903, "No": 0.008096192415360678}, "ground_truth": 0}, {"key": "14171461", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.035073844402787796, "res": {"No": 0.9649245878524223, "Yes": 0.035073844402787796}, "ground_truth": 0}, {"key": "14171461", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.6774553042440504, "res": {"Yes": 0.6774553042440504, "No": 0.32254413480538}, "ground_truth": 0}, {"key": "14171461", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999254613878358, "res": {"Yes": 0.999254613878358, "No": 0.0007453089705418945}, "ground_truth": 1}, {"key": "14171461", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989910401426298, "res": {"Yes": 0.9989910401426298, "No": 0.0010085123043499127}, "ground_truth": 0}, {"key": "14171461", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.799289412822203, "res": {"Yes": 0.799289412822203, "No": 0.200709879335339}, "ground_truth": 0}, {"key": "36892440", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.49519705733808217, "res": {"No": 0.5047886763297996, "Yes": 0.49519705733808217}, "ground_truth": 0}, {"key": "36892440", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.991119974403983, "res": {"Yes": 0.991119974403983, "No": 0.008878911573875081}, "ground_truth": 0}, {"key": "36892440", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9391970508305104, "res": {"Yes": 0.9391970508305104, "No": 0.060800297313335895}, "ground_truth": 1}, {"key": "36892440", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8946221129812331, "res": {"Yes": 0.8946221129812331, "No": 0.10537158197074606}, "ground_truth": 0}, {"key": "36892440", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8074086558292033, "res": {"Yes": 0.8074086558292033, "No": 0.19258204659127318}, "ground_truth": 0}, {"key": "33733410", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7595943486304502, "res": {"Yes": 0.7595943486304502, "No": 0.2404023021586118}, "ground_truth": 0}, {"key": "33733410", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9454065238714946, "res": {"Yes": 0.9454065238714946, "No": 0.0545920669905898}, "ground_truth": 0}, {"key": "33733410", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9769868808728316, "res": {"Yes": 0.9769868808728316, "No": 0.023012668738001556}, "ground_truth": 1}, {"key": "33733410", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8496679864329452, "res": {"Yes": 0.8496679864329452, "No": 0.1503304361594348}, "ground_truth": 0}, {"key": "33733410", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9192395838541084, "res": {"Yes": 0.9192395838541084, "No": 0.08075915085647266}, "ground_truth": 0}, {"key": "38587765", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9468873172994074, "res": {"Yes": 0.9468873172994074, "No": 0.05311229676759619}, "ground_truth": 0}, {"key": "38587765", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9934415888749869, "res": {"Yes": 0.9934415888749869, "No": 0.0065581810069683215}, "ground_truth": 0}, {"key": "38587765", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9980409351774322, "res": {"Yes": 0.9980409351774322, "No": 0.0019590374956574906}, "ground_truth": 1}, {"key": "38587765", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989503547633491, "res": {"Yes": 0.9989503547633491, "No": 0.0010495728574582202}, "ground_truth": 0}, {"key": "38587765", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996822231417649, "res": {"Yes": 0.9996822231417649, "No": 0.0003175161102417202}, "ground_truth": 0}, {"key": "41065582", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7469974705497244, "res": {"Yes": 0.7469974705497244, "No": 0.25299425449965773}, "ground_truth": 0}, {"key": "41065582", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9960182363268962, "res": {"Yes": 0.9960182363268962, "No": 0.003970900832792197}, "ground_truth": 0}, {"key": "41065582", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8260495659980177, "res": {"Yes": 0.8260495659980177, "No": 0.1739464823115477}, "ground_truth": 1}, {"key": "41065582", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9880575163495481, "res": {"Yes": 0.9880575163495481, "No": 0.011939760856346023}, "ground_truth": 0}, {"key": "41065582", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9935586674007018, "res": {"Yes": 0.9935586674007018, "No": 0.0064398502000384975}, "ground_truth": 0}, {"key": "34713891", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 1.1188803132828739e-05, "res": {"No": 0.9999883629027115, "Yes": 1.1188803132828739e-05}, "ground_truth": 0}, {"key": "34713891", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9535074506224853, "res": {"Yes": 0.9535074506224853, "No": 0.046491313048612946}, "ground_truth": 0}, {"key": "34713891", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.98198962976913, "res": {"Yes": 0.98198962976913, "No": 0.01800924575328896}, "ground_truth": 1}, {"key": "34713891", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9953689266429357, "res": {"Yes": 0.9953689266429357, "No": 0.004630726554393549}, "ground_truth": 0}, {"key": "34713891", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9788734758057264, "res": {"Yes": 0.9788734758057264, "No": 0.021121941442670367}, "ground_truth": 0}, {"key": "18913023", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8778232371904043, "res": {"Yes": 0.8778232371904043, "No": 0.12217571299411208}, "ground_truth": 0}, {"key": "18913023", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9070528695432073, "res": {"Yes": 0.9070528695432073, "No": 0.09294663200054532}, "ground_truth": 0}, {"key": "18913023", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9744100354231034, "res": {"Yes": 0.9744100354231034, "No": 0.025589056483653543}, "ground_truth": 1}, {"key": "18913023", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9541229036594424, "res": {"Yes": 0.9541229036594424, "No": 0.045875090493453854}, "ground_truth": 0}, {"key": "18913023", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.4516145763959519, "res": {"No": 0.5483832034748466, "Yes": 0.4516145763959519}, "ground_truth": 0}, {"key": "36884100", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9710270278609329, "res": {"Yes": 0.9710270278609329, "No": 0.028971792348252656}, "ground_truth": 0}, {"key": "36884100", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9964870092416122, "res": {"Yes": 0.9964870092416122, "No": 0.0035120744664604055}, "ground_truth": 0}, {"key": "36884100", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9865371616808811, "res": {"Yes": 0.9865371616808811, "No": 0.01346096854598971}, "ground_truth": 1}, {"key": "36884100", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991515427435833, "res": {"Yes": 0.9991515427435833, "No": 0.0008477413334608896}, "ground_truth": 0}, {"key": "36884100", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9734966776155501, "res": {"Yes": 0.9734966776155501, "No": 0.026501146043514667}, "ground_truth": 0}, {"key": "39899913", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8692780234150623, "res": {"Yes": 0.8692780234150623, "No": 0.13071969252998608}, "ground_truth": 0}, {"key": "39899913", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9950945243390169, "res": {"Yes": 0.9950945243390169, "No": 0.004905162779858386}, "ground_truth": 0}, {"key": "39899913", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9979552070433767, "res": {"Yes": 0.9979552070433767, "No": 0.0020446779163681202}, "ground_truth": 1}, {"key": "39899913", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9597634606852989, "res": {"Yes": 0.9597634606852989, "No": 0.040235089136075625}, "ground_truth": 0}, {"key": "39899913", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9670663883071943, "res": {"Yes": 0.9670663883071943, "No": 0.03293152082793357}, "ground_truth": 0}, {"key": "30725366", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9968031662893405, "res": {"Yes": 0.9968031662893405, "No": 0.0031960263705544272}, "ground_truth": 0}, {"key": "30725366", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9979942687725882, "res": {"Yes": 0.9979942687725882, "No": 0.002005423537266147}, "ground_truth": 0}, {"key": "30725366", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999293595151566, "res": {"Yes": 0.9999293595151566, "No": 7.026491212745861e-05}, "ground_truth": 1}, {"key": "30725366", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986173859926728, "res": {"Yes": 0.9986173859926728, "No": 0.0013820998581333755}, "ground_truth": 0}, {"key": "30725366", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996405269659191, "res": {"Yes": 0.9996405269659191, "No": 0.00035876499279522545}, "ground_truth": 0}, {"key": "26133523", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0906440935999001, "res": {"No": 0.9093540597194865, "Yes": 0.0906440935999001}, "ground_truth": 0}, {"key": "26133523", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998315160150211, "res": {"Yes": 0.9998315160150211, "No": 0.00016792409435123737}, "ground_truth": 0}, {"key": "26133523", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981634857598475, "res": {"Yes": 0.9981634857598475, "No": 0.0018357044035332342}, "ground_truth": 1}, {"key": "26133523", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9982368892091613, "res": {"Yes": 0.9982368892091613, "No": 0.0017628714389112485}, "ground_truth": 0}, {"key": "26133523", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987841952901528, "res": {"Yes": 0.9987841952901528, "No": 0.0012151517656381276}, "ground_truth": 0}, {"key": "29332665", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9830008747081652, "res": {"Yes": 0.9830008747081652, "No": 0.016998587952387387}, "ground_truth": 0}, {"key": "29332665", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9909223469468185, "res": {"Yes": 0.9909223469468185, "No": 0.009077540437409487}, "ground_truth": 0}, {"key": "29332665", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8689975178933962, "res": {"Yes": 0.8689975178933962, "No": 0.1310014614154607}, "ground_truth": 1}, {"key": "29332665", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9847755640785628, "res": {"Yes": 0.9847755640785628, "No": 0.015224297616706755}, "ground_truth": 0}, {"key": "29332665", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9923616502016851, "res": {"Yes": 0.9923616502016851, "No": 0.007637919726407603}, "ground_truth": 0}, {"key": "37400481", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.10507227839215486, "res": {"No": 0.8949270521560211, "Yes": 0.10507227839215486}, "ground_truth": 0}, {"key": "37400481", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9969877427781842, "res": {"Yes": 0.9969877427781842, "No": 0.003012283048060674}, "ground_truth": 0}, {"key": "37400481", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9920973524123354, "res": {"Yes": 0.9920973524123354, "No": 0.007902593359694718}, "ground_truth": 1}, {"key": "37400481", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9835048537349753, "res": {"Yes": 0.9835048537349753, "No": 0.016494925348773688}, "ground_truth": 0}, {"key": "37400481", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.939830076255406, "res": {"Yes": 0.939830076255406, "No": 0.06016962633041038}, "ground_truth": 0}, {"key": "38787241", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9959216306297224, "res": {"Yes": 0.9959216306297224, "No": 0.0040777508696623836}, "ground_truth": 0}, {"key": "38787241", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992243811878948, "res": {"Yes": 0.9992243811878948, "No": 0.0007742041186161618}, "ground_truth": 0}, {"key": "38787241", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999441524405581, "res": {"Yes": 0.999441524405581, "No": 0.0005577743879576214}, "ground_truth": 1}, {"key": "38787241", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990461187342505, "res": {"Yes": 0.9990461187342505, "No": 0.0009535243459536537}, "ground_truth": 0}, {"key": "38787241", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991217968463071, "res": {"Yes": 0.9991217968463071, "No": 0.000877465692723449}, "ground_truth": 0}, {"key": "38225963", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9112539599906134, "res": {"Yes": 0.9112539599906134, "No": 0.0887445465356695}, "ground_truth": 0}, {"key": "38225963", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.44651753826862056, "res": {"No": 0.553480786595793, "Yes": 0.44651753826862056}, "ground_truth": 0}, {"key": "38225963", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8641712100750996, "res": {"Yes": 0.8641712100750996, "No": 0.13582768753400196}, "ground_truth": 1}, {"key": "38225963", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9937269707203562, "res": {"Yes": 0.9937269707203562, "No": 0.006272802841877388}, "ground_truth": 0}, {"key": "38225963", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.7697077974405995, "res": {"Yes": 0.7697077974405995, "No": 0.23029064359747758}, "ground_truth": 0}, {"key": "26072034", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.12017527222097221, "res": {"No": 0.8797953459297119, "Yes": 0.12017527222097221}, "ground_truth": 0}, {"key": "26072034", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8716375109077658, "res": {"Yes": 0.8716375109077658, "No": 0.12835215266125294}, "ground_truth": 0}, {"key": "26072034", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9284643724573872, "res": {"Yes": 0.9284643724573872, "No": 0.07152423039842795}, "ground_truth": 1}, {"key": "26072034", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6611864541576762, "res": {"Yes": 0.6611864541576762, "No": 0.33880681543854585}, "ground_truth": 0}, {"key": "26072034", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.44367003377897185, "res": {"No": 0.5563234551599184, "Yes": 0.44367003377897185}, "ground_truth": 0}, {"key": "35690810", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.2609324545448309, "res": {"No": 0.7390664671959979, "Yes": 0.2609324545448309}, "ground_truth": 0}, {"key": "35690810", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9672019824540309, "res": {"Yes": 0.9672019824540309, "No": 0.03279759465401967}, "ground_truth": 0}, {"key": "35690810", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9765488835102779, "res": {"Yes": 0.9765488835102779, "No": 0.0234505741589358}, "ground_truth": 1}, {"key": "35690810", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9343326143505233, "res": {"Yes": 0.9343326143505233, "No": 0.06566628835632804}, "ground_truth": 0}, {"key": "35690810", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9804963528857772, "res": {"Yes": 0.9804963528857772, "No": 0.019503200474155682}, "ground_truth": 0}, {"key": "36855665", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.2360049735061459, "res": {"No": 0.7639928335727423, "Yes": 0.2360049735061459}, "ground_truth": 0}, {"key": "36855665", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.10188104212269519, "res": {"No": 0.8981171343893768, "Yes": 0.10188104212269519}, "ground_truth": 0}, {"key": "36855665", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8725358996686358, "res": {"Yes": 0.8725358996686358, "No": 0.12746314691102176}, "ground_truth": 1}, {"key": "36855665", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7774122290366493, "res": {"Yes": 0.7774122290366493, "No": 0.22258623042121467}, "ground_truth": 0}, {"key": "36855665", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.7046275102876844, "res": {"Yes": 0.7046275102876844, "No": 0.2953718228085352}, "ground_truth": 0}, {"key": "29757662", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5871049410027248, "res": {"Yes": 0.5871049410027248, "No": 0.41289244238476636}, "ground_truth": 0}, {"key": "29757662", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9954745249611967, "res": {"Yes": 0.9954745249611967, "No": 0.0045249217264588665}, "ground_truth": 0}, {"key": "29757662", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9970988924691875, "res": {"Yes": 0.9970988924691875, "No": 0.00290088997830453}, "ground_truth": 1}, {"key": "29757662", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9921187045613237, "res": {"Yes": 0.9921187045613237, "No": 0.00787919372113721}, "ground_truth": 0}, {"key": "29757662", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9917032614984633, "res": {"Yes": 0.9917032614984633, "No": 0.008295945313403785}, "ground_truth": 0}, {"key": "19134339", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.012839995900449585, "res": {"No": 0.9871588482717116, "Yes": 0.012839995900449585}, "ground_truth": 0}, {"key": "19134339", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9961650201897524, "res": {"Yes": 0.9961650201897524, "No": 0.003834508553856307}, "ground_truth": 0}, {"key": "19134339", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9922431016231201, "res": {"Yes": 0.9922431016231201, "No": 0.007756500600817645}, "ground_truth": 1}, {"key": "19134339", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.987175224385677, "res": {"Yes": 0.987175224385677, "No": 0.012824447099560448}, "ground_truth": 0}, {"key": "19134339", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9700023546630252, "res": {"Yes": 0.9700023546630252, "No": 0.029997375034446886}, "ground_truth": 0}, {"key": "35360732", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9892272318982888, "res": {"Yes": 0.9892272318982888, "No": 0.010771219101747482}, "ground_truth": 0}, {"key": "35360732", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9918790371392926, "res": {"Yes": 0.9918790371392926, "No": 0.008119535750717324}, "ground_truth": 0}, {"key": "35360732", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9912548988023949, "res": {"Yes": 0.9912548988023949, "No": 0.00874358491562797}, "ground_truth": 1}, {"key": "35360732", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9754672226961358, "res": {"Yes": 0.9754672226961358, "No": 0.02453229417033351}, "ground_truth": 0}, {"key": "35360732", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9928382727723742, "res": {"Yes": 0.9928382727723742, "No": 0.007160885016440619}, "ground_truth": 0}, {"key": "37713629", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.029351136826996787, "res": {"No": 0.9706460230714038, "Yes": 0.029351136826996787}, "ground_truth": 0}, {"key": "37713629", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9696701993191104, "res": {"Yes": 0.9696701993191104, "No": 0.030327739203616363}, "ground_truth": 0}, {"key": "37713629", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.976229073161617, "res": {"Yes": 0.976229073161617, "No": 0.023767599294431063}, "ground_truth": 1}, {"key": "37713629", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9845506340415018, "res": {"Yes": 0.9845506340415018, "No": 0.015446651971425793}, "ground_truth": 0}, {"key": "37713629", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.6727852593310649, "res": {"Yes": 0.6727852593310649, "No": 0.32721213092063095}, "ground_truth": 0}, {"key": "33393394", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.00015737249234374173, "res": {"No": 0.9998421234138201, "Yes": 0.00015737249234374173}, "ground_truth": 0}, {"key": "33393394", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9804444448219333, "res": {"Yes": 0.9804444448219333, "No": 0.01955497987350883}, "ground_truth": 0}, {"key": "33393394", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9965922565824571, "res": {"Yes": 0.9965922565824571, "No": 0.003406963001214267}, "ground_truth": 1}, {"key": "33393394", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9429793356532213, "res": {"Yes": 0.9429793356532213, "No": 0.05701898479631142}, "ground_truth": 0}, {"key": "33393394", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9938789725864333, "res": {"Yes": 0.9938789725864333, "No": 0.006120045792813681}, "ground_truth": 0}, {"key": "32275837", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6514454407344652, "res": {"Yes": 0.6514454407344652, "No": 0.3485539689787806}, "ground_truth": 0}, {"key": "32275837", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991066860426969, "res": {"Yes": 0.9991066860426969, "No": 0.0008927339051925274}, "ground_truth": 0}, {"key": "32275837", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9770589216564237, "res": {"Yes": 0.9770589216564237, "No": 0.022940123125754556}, "ground_truth": 1}, {"key": "32275837", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9958710177842822, "res": {"Yes": 0.9958710177842822, "No": 0.004128551747384189}, "ground_truth": 0}, {"key": "32275837", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9932649041135586, "res": {"Yes": 0.9932649041135586, "No": 0.006734491318732734}, "ground_truth": 0}, {"key": "21458094", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9968476949797322, "res": {"Yes": 0.9968476949797322, "No": 0.0031516504846012283}, "ground_truth": 0}, {"key": "21458094", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8714549481197371, "res": {"Yes": 0.8714549481197371, "No": 0.1285442597626545}, "ground_truth": 0}, {"key": "21458094", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9832366079019446, "res": {"Yes": 0.9832366079019446, "No": 0.01676308720693486}, "ground_truth": 1}, {"key": "21458094", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9947928906403362, "res": {"Yes": 0.9947928906403362, "No": 0.0052061326110541825}, "ground_truth": 0}, {"key": "21458094", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9821405859427565, "res": {"Yes": 0.9821405859427565, "No": 0.017858149862948872}, "ground_truth": 0}, {"key": "40975362", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9956681874830838, "res": {"Yes": 0.9956681874830838, "No": 0.004331335738055216}, "ground_truth": 0}, {"key": "40975362", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9929118413390057, "res": {"Yes": 0.9929118413390057, "No": 0.007087004671426067}, "ground_truth": 0}, {"key": "40975362", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9541794476122452, "res": {"Yes": 0.9541794476122452, "No": 0.04582009172975182}, "ground_truth": 1}, {"key": "40975362", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9790121626346933, "res": {"Yes": 0.9790121626346933, "No": 0.020986460233197817}, "ground_truth": 0}, {"key": "40975362", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.4049696309482905, "res": {"No": 0.5950278665472598, "Yes": 0.4049696309482905}, "ground_truth": 0}, {"key": "35234201", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7325141506382762, "res": {"Yes": 0.7325141506382762, "No": 0.26748079757207927}, "ground_truth": 0}, {"key": "35234201", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.3795375655837091, "res": {"No": 0.6204345495985035, "Yes": 0.3795375655837091}, "ground_truth": 0}, {"key": "35234201", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9885841776916499, "res": {"Yes": 0.9885841776916499, "No": 0.011413173734695352}, "ground_truth": 1}, {"key": "35234201", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9028616489349844, "res": {"Yes": 0.9028616489349844, "No": 0.09713371933119898}, "ground_truth": 0}, {"key": "35234201", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9405564187518669, "res": {"Yes": 0.9405564187518669, "No": 0.0594406287936832}, "ground_truth": 0}, {"key": "36037573", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9904470937005296, "res": {"Yes": 0.9904470937005296, "No": 0.009550860727660444}, "ground_truth": 0}, {"key": "36037573", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994135385357894, "res": {"Yes": 0.9994135385357894, "No": 0.0005858701382661994}, "ground_truth": 0}, {"key": "36037573", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9974188782378688, "res": {"Yes": 0.9974188782378688, "No": 0.0025802440280653285}, "ground_truth": 1}, {"key": "36037573", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998982239070335, "res": {"Yes": 0.998982239070335, "No": 0.001016909655681349}, "ground_truth": 0}, {"key": "36037573", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9979070093658945, "res": {"Yes": 0.9979070093658945, "No": 0.0020912778256533707}, "ground_truth": 0}, {"key": "30861915", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9943202793491533, "res": {"Yes": 0.9943202793491533, "No": 0.005679352779572054}, "ground_truth": 0}, {"key": "30861915", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9979290946894702, "res": {"Yes": 0.9979290946894702, "No": 0.002070786690879732}, "ground_truth": 0}, {"key": "30861915", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9988450893232613, "res": {"Yes": 0.9988450893232613, "No": 0.0011546505902969165}, "ground_truth": 1}, {"key": "30861915", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9962416752479522, "res": {"Yes": 0.9962416752479522, "No": 0.00375829342854467}, "ground_truth": 0}, {"key": "30861915", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.991559418927214, "res": {"Yes": 0.991559418927214, "No": 0.008440163487542248}, "ground_truth": 0}, {"key": "40173012", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9993142482352896, "res": {"Yes": 0.9993142482352896, "No": 0.0006849878891029659}, "ground_truth": 0}, {"key": "40173012", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.995855065054222, "res": {"Yes": 0.995855065054222, "No": 0.004141608665795919}, "ground_truth": 0}, {"key": "40173012", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993703232296777, "res": {"Yes": 0.9993703232296777, "No": 0.0006286348465272928}, "ground_truth": 1}, {"key": "40173012", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9917751258956248, "res": {"Yes": 0.9917751258956248, "No": 0.008223757973456354}, "ground_truth": 0}, {"key": "40173012", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9977689716515892, "res": {"Yes": 0.9977689716515892, "No": 0.002230035665812755}, "ground_truth": 0}, {"key": "35100330", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8813583518145618, "res": {"Yes": 0.8813583518145618, "No": 0.11864038927656656}, "ground_truth": 0}, {"key": "35100330", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9973685955302888, "res": {"Yes": 0.9973685955302888, "No": 0.002631342780914733}, "ground_truth": 0}, {"key": "35100330", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993866366842327, "res": {"Yes": 0.9993866366842327, "No": 0.0006129495043830942}, "ground_truth": 1}, {"key": "35100330", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998238432484585, "res": {"Yes": 0.998238432484585, "No": 0.0017614217210016874}, "ground_truth": 0}, {"key": "35100330", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997330972248848, "res": {"Yes": 0.9997330972248848, "No": 0.0002664643751578247}, "ground_truth": 0}, {"key": "37220221", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.723027898811558, "res": {"Yes": 0.723027898811558, "No": 0.2769692471713911}, "ground_truth": 0}, {"key": "37220221", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9896012547184542, "res": {"Yes": 0.9896012547184542, "No": 0.01039789808991574}, "ground_truth": 0}, {"key": "37220221", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9577100611266837, "res": {"Yes": 0.9577100611266837, "No": 0.042287709106931685}, "ground_truth": 1}, {"key": "37220221", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9728005785926562, "res": {"Yes": 0.9728005785926562, "No": 0.02719877074227899}, "ground_truth": 0}, {"key": "37220221", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9603538391328698, "res": {"Yes": 0.9603538391328698, "No": 0.039645041320331025}, "ground_truth": 0}, {"key": "38815218", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8086295805646937, "res": {"Yes": 0.8086295805646937, "No": 0.19136792404101344}, "ground_truth": 0}, {"key": "38815218", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9062148679180005, "res": {"Yes": 0.9062148679180005, "No": 0.0937842075641611}, "ground_truth": 0}, {"key": "38815218", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5498458020350979, "res": {"Yes": 0.5498458020350979, "No": 0.4501478475803532}, "ground_truth": 1}, {"key": "38815218", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6784814770041869, "res": {"Yes": 0.6784814770041869, "No": 0.3215168905440559}, "ground_truth": 0}, {"key": "38815218", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.5072901759310604, "res": {"Yes": 0.5072901759310604, "No": 0.49270202472141483}, "ground_truth": 0}, {"key": "39379109", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9757126420982355, "res": {"Yes": 0.9757126420982355, "No": 0.024286564081699234}, "ground_truth": 0}, {"key": "39379109", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9957662828565769, "res": {"Yes": 0.9957662828565769, "No": 0.00423322442878697}, "ground_truth": 0}, {"key": "39379109", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9962533881301822, "res": {"Yes": 0.9962533881301822, "No": 0.003746571964062244}, "ground_truth": 1}, {"key": "39379109", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9983876508338004, "res": {"Yes": 0.9983876508338004, "No": 0.0016121799655001213}, "ground_truth": 0}, {"key": "39379109", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9967688143527953, "res": {"Yes": 0.9967688143527953, "No": 0.0032300502002347567}, "ground_truth": 0}, {"key": "14576125", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.998159205943445, "res": {"Yes": 0.998159205943445, "No": 0.0018398331331116004}, "ground_truth": 0}, {"key": "14576125", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.998548794418675, "res": {"Yes": 0.998548794418675, "No": 0.0014503302352965833}, "ground_truth": 0}, {"key": "14576125", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9988091723985838, "res": {"Yes": 0.9988091723985838, "No": 0.001190006392529022}, "ground_truth": 1}, {"key": "14576125", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9916816882203662, "res": {"Yes": 0.9916816882203662, "No": 0.008317310779623268}, "ground_truth": 0}, {"key": "14576125", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.995991862411992, "res": {"Yes": 0.995991862411992, "No": 0.004007299924467946}, "ground_truth": 0}, {"key": "40814250", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9991364351369416, "res": {"Yes": 0.9991364351369416, "No": 0.0008632916283811821}, "ground_truth": 0}, {"key": "40814250", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998678640007302, "res": {"Yes": 0.9998678640007302, "No": 0.00013182503592861083}, "ground_truth": 0}, {"key": "40814250", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999931310055916, "res": {"Yes": 0.9999931310055916, "No": 6.783149743163111e-06}, "ground_truth": 1}, {"key": "40814250", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999921773835968, "res": {"Yes": 0.9999921773835968, "No": 7.73158245712631e-06}, "ground_truth": 0}, {"key": "40814250", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999673835219289, "res": {"Yes": 0.9999673835219289, "No": 3.249559552890769e-05}, "ground_truth": 0}, {"key": "36334488", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9621533822668196, "res": {"Yes": 0.9621533822668196, "No": 0.0378445386049366}, "ground_truth": 0}, {"key": "36334488", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9306495832901971, "res": {"Yes": 0.9306495832901971, "No": 0.06934726989828628}, "ground_truth": 0}, {"key": "36334488", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9861319645983618, "res": {"Yes": 0.9861319645983618, "No": 0.01386440520307161}, "ground_truth": 1}, {"key": "36334488", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9776339688319012, "res": {"Yes": 0.9776339688319012, "No": 0.02236526932312829}, "ground_truth": 0}, {"key": "36334488", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9484797654926402, "res": {"Yes": 0.9484797654926402, "No": 0.051516403884535676}, "ground_truth": 0}, {"key": "36888322", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6171860443357405, "res": {"Yes": 0.6171860443357405, "No": 0.38281187441176534}, "ground_truth": 0}, {"key": "36888322", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.2503046507534721, "res": {"No": 0.749692842253268, "Yes": 0.2503046507534721}, "ground_truth": 0}, {"key": "36888322", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6679189534345621, "res": {"Yes": 0.6679189534345621, "No": 0.33207348307276724}, "ground_truth": 1}, {"key": "36888322", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7588634162649066, "res": {"Yes": 0.7588634162649066, "No": 0.24113488819796933}, "ground_truth": 0}, {"key": "36888322", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.5506706389531679, "res": {"Yes": 0.5506706389531679, "No": 0.44932599855584476}, "ground_truth": 0}, {"key": "37318916", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.00019949886716927022, "res": {"No": 0.9997964803229399, "Yes": 0.00019949886716927022}, "ground_truth": 0}, {"key": "37318916", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 9.011463282893831e-05, "res": {"No": 0.9999050439846079, "Yes": 9.011463282893831e-05}, "ground_truth": 1}, {"key": "37318916", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.6828376820766304e-05, "res": {"No": 0.9999778731568022, "Yes": 1.6828376820766304e-05}, "ground_truth": 0}, {"key": "37318916", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 7.775023378720983e-05, "res": {"No": 0.9999144602247352, "Yes": 7.775023378720983e-05}, "ground_truth": 0}, {"key": "39308700", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.00012191981297802573, "res": {"No": 0.9998769223246786, "Yes": 0.00012191981297802573}, "ground_truth": 0}, {"key": "39308700", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9034443722687394, "res": {"Yes": 0.9034443722687394, "No": 0.09655412954219993}, "ground_truth": 0}, {"key": "39308700", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9878453982327253, "res": {"Yes": 0.9878453982327253, "No": 0.012153197197313857}, "ground_truth": 1}, {"key": "39308700", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9533388329731821, "res": {"Yes": 0.9533388329731821, "No": 0.046658442160070306}, "ground_truth": 0}, {"key": "39308700", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9446091546542669, "res": {"Yes": 0.9446091546542669, "No": 0.055387242030911756}, "ground_truth": 0}, {"key": "31061543", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.044366180510452255, "res": {"No": 0.955633144156504, "Yes": 0.044366180510452255}, "ground_truth": 0}, {"key": "31061543", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9982673007595823, "res": {"Yes": 0.9982673007595823, "No": 0.001732471460589691}, "ground_truth": 0}, {"key": "31061543", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9975753229895662, "res": {"Yes": 0.9975753229895662, "No": 0.0024244431738260092}, "ground_truth": 1}, {"key": "31061543", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9981577780777221, "res": {"Yes": 0.9981577780777221, "No": 0.0018421756267685936}, "ground_truth": 0}, {"key": "31061543", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9571669646766261, "res": {"Yes": 0.9571669646766261, "No": 0.04283294198044582}, "ground_truth": 0}, {"key": "37380894", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6714128201268625, "res": {"Yes": 0.6714128201268625, "No": 0.32858389024723594}, "ground_truth": 0}, {"key": "37380894", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9975989286942064, "res": {"Yes": 0.9975989286942064, "No": 0.0024009120331564787}, "ground_truth": 0}, {"key": "37380894", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991686825959373, "res": {"Yes": 0.9991686825959373, "No": 0.0008308768242732536}, "ground_truth": 1}, {"key": "37380894", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990112662196768, "res": {"Yes": 0.9990112662196768, "No": 0.0009881356224013765}, "ground_truth": 0}, {"key": "37380894", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994837969286733, "res": {"Yes": 0.9994837969286733, "No": 0.0005158709090715129}, "ground_truth": 0}, {"key": "38410139", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9749054802665069, "res": {"Yes": 0.9749054802665069, "No": 0.025092609962509473}, "ground_truth": 0}, {"key": "38410139", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9941059457935408, "res": {"Yes": 0.9941059457935408, "No": 0.005893271515188375}, "ground_truth": 0}, {"key": "38410139", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9977773943539134, "res": {"Yes": 0.9977773943539134, "No": 0.002221733570987529}, "ground_truth": 1}, {"key": "38410139", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986897843829529, "res": {"Yes": 0.9986897843829529, "No": 0.0013096792749851735}, "ground_truth": 0}, {"key": "38410139", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9984980538517744, "res": {"Yes": 0.9984980538517744, "No": 0.0015011297315285785}, "ground_truth": 0}, {"key": "35953842", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0006025881477387008, "res": {"No": 0.9993969900836017, "Yes": 0.0006025881477387008}, "ground_truth": 0}, {"key": "35953842", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9961087212234047, "res": {"Yes": 0.9961087212234047, "No": 0.003891226015220073}, "ground_truth": 0}, {"key": "35953842", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9978235598879204, "res": {"Yes": 0.9978235598879204, "No": 0.0021762370880899855}, "ground_truth": 1}, {"key": "35953842", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9911780597009617, "res": {"Yes": 0.9911780597009617, "No": 0.008821698974671138}, "ground_truth": 0}, {"key": "35953842", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9444620640131938, "res": {"Yes": 0.9444620640131938, "No": 0.05553748948786912}, "ground_truth": 0}, {"key": "39815663", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.923679277377259, "res": {"Yes": 0.923679277377259, "No": 0.07631729464453975}, "ground_truth": 0}, {"key": "39815663", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.999330437956773, "res": {"Yes": 0.999330437956773, "No": 0.0006692967658549577}, "ground_truth": 0}, {"key": "39815663", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.984776836901802, "res": {"Yes": 0.984776836901802, "No": 0.015221699050897484}, "ground_truth": 1}, {"key": "39815663", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.997990112534311, "res": {"Yes": 0.997990112534311, "No": 0.0020094050414624443}, "ground_truth": 0}, {"key": "39815663", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9847583365655342, "res": {"Yes": 0.9847583365655342, "No": 0.015240706066103674}, "ground_truth": 0}, {"key": "35121432", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8115715791019736, "res": {"Yes": 0.8115715791019736, "No": 0.1884262114358161}, "ground_truth": 0}, {"key": "35121432", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9582239040661916, "res": {"Yes": 0.9582239040661916, "No": 0.0417750586730571}, "ground_truth": 0}, {"key": "35121432", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9881439978920584, "res": {"Yes": 0.9881439978920584, "No": 0.011852813659085965}, "ground_truth": 1}, {"key": "35121432", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9640552892716844, "res": {"Yes": 0.9640552892716844, "No": 0.035943385622160755}, "ground_truth": 0}, {"key": "35121432", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9720965705880353, "res": {"Yes": 0.9720965705880353, "No": 0.02789812769814504}, "ground_truth": 0}, {"key": "21712310", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.864488963741371, "res": {"Yes": 0.864488963741371, "No": 0.13550908404324033}, "ground_truth": 0}, {"key": "21712310", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9653556464454989, "res": {"Yes": 0.9653556464454989, "No": 0.03464355061873068}, "ground_truth": 0}, {"key": "21712310", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9925046535662952, "res": {"Yes": 0.9925046535662952, "No": 0.007494926155333463}, "ground_truth": 1}, {"key": "21712310", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9883359704260575, "res": {"Yes": 0.9883359704260575, "No": 0.011663482173236372}, "ground_truth": 0}, {"key": "21712310", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.996473983148918, "res": {"Yes": 0.996473983148918, "No": 0.003525866702112451}, "ground_truth": 0}, {"key": "37952914", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8080782508813862, "res": {"Yes": 0.8080782508813862, "No": 0.1919200307937004}, "ground_truth": 0}, {"key": "37952914", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.955962888734119, "res": {"Yes": 0.955962888734119, "No": 0.04403627732964827}, "ground_truth": 0}, {"key": "37952914", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9931715316689633, "res": {"Yes": 0.9931715316689633, "No": 0.006827907645437605}, "ground_truth": 1}, {"key": "37952914", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9939031208590271, "res": {"Yes": 0.9939031208590271, "No": 0.006096589881142306}, "ground_truth": 0}, {"key": "37952914", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9340539033989649, "res": {"Yes": 0.9340539033989649, "No": 0.06594488246661409}, "ground_truth": 0}, {"key": "38956779", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 8.496529142708018e-08, "res": {"No": 0.9999996871837189, "Yes": 8.496529142708018e-08}, "ground_truth": 0}, {"key": "38956779", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988141656578796, "res": {"Yes": 0.9988141656578796, "No": 0.001185685251762569}, "ground_truth": 0}, {"key": "38956779", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9951405641487755, "res": {"Yes": 0.9951405641487755, "No": 0.004858588574589391}, "ground_truth": 1}, {"key": "38956779", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9936835416271063, "res": {"Yes": 0.9936835416271063, "No": 0.006315963257337362}, "ground_truth": 0}, {"key": "38956779", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9926454808113944, "res": {"Yes": 0.9926454808113944, "No": 0.007352462410705437}, "ground_truth": 0}, {"key": "36101833", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7240993299090188, "res": {"Yes": 0.7240993299090188, "No": 0.27590013733171376}, "ground_truth": 0}, {"key": "36101833", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9840064608962164, "res": {"Yes": 0.9840064608962164, "No": 0.01599320235228726}, "ground_truth": 0}, {"key": "36101833", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9695362767504827, "res": {"Yes": 0.9695362767504827, "No": 0.03046302274192375}, "ground_truth": 1}, {"key": "36101833", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9624020829454273, "res": {"Yes": 0.9624020829454273, "No": 0.03759686160582935}, "ground_truth": 0}, {"key": "36101833", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9349973536671208, "res": {"Yes": 0.9349973536671208, "No": 0.06499600971310744}, "ground_truth": 0}, {"key": "35544662", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.030157941223277356, "res": {"No": 0.9698412021599129, "Yes": 0.030157941223277356}, "ground_truth": 0}, {"key": "35544662", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.978288750122181, "res": {"Yes": 0.978288750122181, "No": 0.021710388505922103}, "ground_truth": 0}, {"key": "35544662", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9562511129239782, "res": {"Yes": 0.9562511129239782, "No": 0.04374712423982836}, "ground_truth": 1}, {"key": "35544662", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9734607523570564, "res": {"Yes": 0.9734607523570564, "No": 0.02653557482603109}, "ground_truth": 0}, {"key": "35544662", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9198073711929692, "res": {"Yes": 0.9198073711929692, "No": 0.08018995671049868}, "ground_truth": 0}, {"key": "39759044", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9986578123406379, "res": {"Yes": 0.9986578123406379, "No": 0.0013409557137806483}, "ground_truth": 0}, {"key": "39759044", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9932270270759636, "res": {"Yes": 0.9932270270759636, "No": 0.006771534879198234}, "ground_truth": 0}, {"key": "39759044", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9885503924161192, "res": {"Yes": 0.9885503924161192, "No": 0.011448055370995908}, "ground_truth": 1}, {"key": "39759044", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9428364756767073, "res": {"Yes": 0.9428364756767073, "No": 0.05716189922516567}, "ground_truth": 0}, {"key": "39759044", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.99529345764599, "res": {"Yes": 0.99529345764599, "No": 0.004705051783255217}, "ground_truth": 0}, {"key": "39433018", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.012858431949426305, "res": {"No": 0.9871408337742633, "Yes": 0.012858431949426305}, "ground_truth": 0}, {"key": "39433018", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9819176702417824, "res": {"Yes": 0.9819176702417824, "No": 0.01808158431165823}, "ground_truth": 0}, {"key": "39433018", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9937977349474518, "res": {"Yes": 0.9937977349474518, "No": 0.00620115756702072}, "ground_truth": 1}, {"key": "39433018", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9959536755212699, "res": {"Yes": 0.9959536755212699, "No": 0.004044234469116841}, "ground_truth": 0}, {"key": "39433018", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9540122260976353, "res": {"Yes": 0.9540122260976353, "No": 0.045987157787648635}, "ground_truth": 0}, {"key": "22111959", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9896530884208077, "res": {"Yes": 0.9896530884208077, "No": 0.010343886604651683}, "ground_truth": 0}, {"key": "22111959", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9942149233737018, "res": {"Yes": 0.9942149233737018, "No": 0.005783624596605969}, "ground_truth": 0}, {"key": "22111959", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9967916737134868, "res": {"Yes": 0.9967916737134868, "No": 0.003207983924681282}, "ground_truth": 1}, {"key": "22111959", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9953102284820452, "res": {"Yes": 0.9953102284820452, "No": 0.004688148268349019}, "ground_truth": 0}, {"key": "22111959", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9906237061618695, "res": {"Yes": 0.9906237061618695, "No": 0.009375665715438077}, "ground_truth": 0}, {"key": "38210094", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5921793700742747, "res": {"Yes": 0.5921793700742747, "No": 0.407819134768185}, "ground_truth": 0}, {"key": "38210094", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9950396308666547, "res": {"Yes": 0.9950396308666547, "No": 0.004960304247027586}, "ground_truth": 0}, {"key": "38210094", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9976582537757582, "res": {"Yes": 0.9976582537757582, "No": 0.0023408925282651515}, "ground_truth": 1}, {"key": "38210094", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9014204788918724, "res": {"Yes": 0.9014204788918724, "No": 0.09857880499139272}, "ground_truth": 0}, {"key": "38210094", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9973892313000066, "res": {"Yes": 0.9973892313000066, "No": 0.0026103927240034895}, "ground_truth": 0}, {"key": "37675935", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8832794769670118, "res": {"Yes": 0.8832794769670118, "No": 0.1167167756274123}, "ground_truth": 0}, {"key": "37675935", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8202693639859079, "res": {"Yes": 0.8202693639859079, "No": 0.1797300097446362}, "ground_truth": 0}, {"key": "37675935", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9830403911545831, "res": {"Yes": 0.9830403911545831, "No": 0.01695865425396183}, "ground_truth": 1}, {"key": "37675935", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9884756123960358, "res": {"Yes": 0.9884756123960358, "No": 0.011523113397827809}, "ground_truth": 0}, {"key": "37675935", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.985685502737311, "res": {"Yes": 0.985685502737311, "No": 0.014313038126734746}, "ground_truth": 0}, {"key": "35732604", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9961879698049902, "res": {"Yes": 0.9961879698049902, "No": 0.0038109430529970454}, "ground_truth": 0}, {"key": "35732604", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9707921735678516, "res": {"Yes": 0.9707921735678516, "No": 0.029206330361561235}, "ground_truth": 0}, {"key": "35732604", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998331689479716, "res": {"Yes": 0.998331689479716, "No": 0.0016673532334002513}, "ground_truth": 1}, {"key": "35732604", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.994783095559841, "res": {"Yes": 0.994783095559841, "No": 0.005215406214930771}, "ground_truth": 0}, {"key": "35732604", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9854834480647168, "res": {"Yes": 0.9854834480647168, "No": 0.014511853220705272}, "ground_truth": 0}, {"key": "27453212", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9642980969228472, "res": {"Yes": 0.9642980969228472, "No": 0.03570028882118491}, "ground_truth": 0}, {"key": "27453212", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.991466961809549, "res": {"Yes": 0.991466961809549, "No": 0.008529316923443638}, "ground_truth": 0}, {"key": "27453212", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9885845276505107, "res": {"Yes": 0.9885845276505107, "No": 0.011411343208440432}, "ground_truth": 1}, {"key": "27453212", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9908758698150474, "res": {"Yes": 0.9908758698150474, "No": 0.009122375840460547}, "ground_truth": 0}, {"key": "27453212", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9949142998868468, "res": {"Yes": 0.9949142998868468, "No": 0.005081519235477668}, "ground_truth": 0}, {"key": "39910047", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.00018353833540499153, "res": {"No": 0.999815906757241, "Yes": 0.00018353833540499153}, "ground_truth": 0}, {"key": "39910047", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9354560168811865, "res": {"Yes": 0.9354560168811865, "No": 0.06454350412392877}, "ground_truth": 0}, {"key": "39910047", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4813792009043358, "res": {"No": 0.5186200888151858, "Yes": 0.4813792009043358}, "ground_truth": 1}, {"key": "39910047", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8793342340991078, "res": {"Yes": 0.8793342340991078, "No": 0.1206652495444749}, "ground_truth": 0}, {"key": "39910047", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.04471847731597405, "res": {"No": 0.9552811059827547, "Yes": 0.04471847731597405}, "ground_truth": 0}, {"key": "40054265", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9991589234533325, "res": {"Yes": 0.9991589234533325, "No": 0.000840777919537944}, "ground_truth": 0}, {"key": "40054265", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988050088712265, "res": {"Yes": 0.9988050088712265, "No": 0.001194567600099268}, "ground_truth": 0}, {"key": "40054265", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997929049571395, "res": {"Yes": 0.9997929049571395, "No": 0.00020686482444820135}, "ground_truth": 1}, {"key": "40054265", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9929640335329742, "res": {"Yes": 0.9929640335329742, "No": 0.007035630282737637}, "ground_truth": 0}, {"key": "40054265", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9893191695084933, "res": {"Yes": 0.9893191695084933, "No": 0.01068039992489458}, "ground_truth": 0}, {"key": "19984615", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.11023564248818184, "res": {"No": 0.8897614196513683, "Yes": 0.11023564248818184}, "ground_truth": 0}, {"key": "19984615", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.008123764693892529, "res": {"No": 0.9918620280474648, "Yes": 0.008123764693892529}, "ground_truth": 0}, {"key": "19984615", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.08630316561910323, "res": {"No": 0.9136916296121392, "Yes": 0.08630316561910323}, "ground_truth": 1}, {"key": "19984615", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.22009770886990843, "res": {"No": 0.7799005011995306, "Yes": 0.22009770886990843}, "ground_truth": 0}, {"key": "19984615", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.2353144909846438, "res": {"No": 0.7646836347783852, "Yes": 0.2353144909846438}, "ground_truth": 0}, {"key": "16490806", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9849665842547184, "res": {"Yes": 0.9849665842547184, "No": 0.01503331711450435}, "ground_truth": 0}, {"key": "16490806", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9980317891722092, "res": {"Yes": 0.9980317891722092, "No": 0.0019669935867549027}, "ground_truth": 0}, {"key": "16490806", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9988032266051589, "res": {"Yes": 0.9988032266051589, "No": 0.0011954411482578934}, "ground_truth": 1}, {"key": "16490806", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9957942978405296, "res": {"Yes": 0.9957942978405296, "No": 0.004205578166511997}, "ground_truth": 0}, {"key": "16490806", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9973823578158034, "res": {"Yes": 0.9973823578158034, "No": 0.0026175330205455375}, "ground_truth": 0}, {"key": "36396237", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8706344153797592, "res": {"Yes": 0.8706344153797592, "No": 0.12936482052376538}, "ground_truth": 0}, {"key": "36396237", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8624441549990859, "res": {"Yes": 0.8624441549990859, "No": 0.1375518282733195}, "ground_truth": 0}, {"key": "36396237", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.995253906465144, "res": {"Yes": 0.995253906465144, "No": 0.004745511761232006}, "ground_truth": 1}, {"key": "36396237", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9899277770395168, "res": {"Yes": 0.9899277770395168, "No": 0.010071204164565181}, "ground_truth": 0}, {"key": "36396237", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9904812350002714, "res": {"Yes": 0.9904812350002714, "No": 0.009518599046394354}, "ground_truth": 0}, {"key": "40726444", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.013049819196287818, "res": {"No": 0.9869447942404893, "Yes": 0.013049819196287818}, "ground_truth": 0}, {"key": "40726444", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.3169832961583469, "res": {"No": 0.6830136529044063, "Yes": 0.3169832961583469}, "ground_truth": 0}, {"key": "40726444", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9135499398194048, "res": {"Yes": 0.9135499398194048, "No": 0.08644497453550226}, "ground_truth": 1}, {"key": "40726444", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9890887750015609, "res": {"Yes": 0.9890887750015609, "No": 0.010910430725888673}, "ground_truth": 0}, {"key": "40726444", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.07409546126800273, "res": {"No": 0.9258944645494476, "Yes": 0.07409546126800273}, "ground_truth": 0}, {"key": "37314826", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9993894958335514, "res": {"Yes": 0.9993894958335514, "No": 0.00060687665004602}, "ground_truth": 0}, {"key": "37314826", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993993690510283, "res": {"Yes": 0.9993993690510283, "No": 0.0006003754190494597}, "ground_truth": 0}, {"key": "37314826", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995394085348308, "res": {"Yes": 0.9995394085348308, "No": 0.00045999161357777033}, "ground_truth": 1}, {"key": "37314826", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988247512463821, "res": {"Yes": 0.9988247512463821, "No": 0.0011739509954172894}, "ground_truth": 0}, {"key": "37314826", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993275862715041, "res": {"Yes": 0.9993275862715041, "No": 0.0006706597691722891}, "ground_truth": 0}, {"key": "38506971", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7814763592953312, "res": {"Yes": 0.7814763592953312, "No": 0.21852252074208253}, "ground_truth": 0}, {"key": "38506971", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9928638952802983, "res": {"Yes": 0.9928638952802983, "No": 0.007135709454429812}, "ground_truth": 0}, {"key": "38506971", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9937740663461716, "res": {"Yes": 0.9937740663461716, "No": 0.006225873656831847}, "ground_truth": 1}, {"key": "38506971", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9979300355374642, "res": {"Yes": 0.9979300355374642, "No": 0.002069880298890148}, "ground_truth": 0}, {"key": "38506971", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9965986386800442, "res": {"Yes": 0.9965986386800442, "No": 0.0034012339048700973}, "ground_truth": 0}, {"key": "40699312", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9807352514869401, "res": {"Yes": 0.9807352514869401, "No": 0.019264279519888607}, "ground_truth": 0}, {"key": "40699312", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9891186330750724, "res": {"Yes": 0.9891186330750724, "No": 0.010880457378204684}, "ground_truth": 0}, {"key": "40699312", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9979842940693183, "res": {"Yes": 0.9979842940693183, "No": 0.0020150497380345884}, "ground_truth": 1}, {"key": "40699312", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9955884237170517, "res": {"Yes": 0.9955884237170517, "No": 0.004411459464245516}, "ground_truth": 0}, {"key": "40699312", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9179494180895774, "res": {"Yes": 0.9179494180895774, "No": 0.08204995581165923}, "ground_truth": 0}, {"key": "34695474", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9728930943810427, "res": {"Yes": 0.9728930943810427, "No": 0.027105543901700918}, "ground_truth": 0}, {"key": "34695474", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9761429696498408, "res": {"Yes": 0.9761429696498408, "No": 0.02385551145701837}, "ground_truth": 0}, {"key": "34695474", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9923286647478882, "res": {"Yes": 0.9923286647478882, "No": 0.007669956768956631}, "ground_truth": 1}, {"key": "34695474", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9946067652936995, "res": {"Yes": 0.9946067652936995, "No": 0.005392355373476912}, "ground_truth": 0}, {"key": "34695474", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9680088056510333, "res": {"Yes": 0.9680088056510333, "No": 0.03198918186687444}, "ground_truth": 0}, {"key": "36281498", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8909641482279084, "res": {"Yes": 0.8909641482279084, "No": 0.10903247558302148}, "ground_truth": 0}, {"key": "36281498", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9660692837819308, "res": {"Yes": 0.9660692837819308, "No": 0.033929674279874346}, "ground_truth": 0}, {"key": "36281498", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984410042983071, "res": {"Yes": 0.9984410042983071, "No": 0.0015580363509533782}, "ground_truth": 1}, {"key": "36281498", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9974979874641035, "res": {"Yes": 0.9974979874641035, "No": 0.0025008560142886016}, "ground_truth": 0}, {"key": "36281498", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9975600269851356, "res": {"Yes": 0.9975600269851356, "No": 0.002439202835059974}, "ground_truth": 0}, {"key": "39558652", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5540353974289071, "res": {"Yes": 0.5540353974289071, "No": 0.4459635307162069}, "ground_truth": 0}, {"key": "39558652", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.30270059418689943, "res": {"No": 0.6972980787704282, "Yes": 0.30270059418689943}, "ground_truth": 0}, {"key": "39558652", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9974671458018869, "res": {"Yes": 0.9974671458018869, "No": 0.0025325641283753565}, "ground_truth": 1}, {"key": "39558652", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9855275661897469, "res": {"Yes": 0.9855275661897469, "No": 0.0144717616609903}, "ground_truth": 0}, {"key": "39558652", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9965016876033641, "res": {"Yes": 0.9965016876033641, "No": 0.0034982739124691338}, "ground_truth": 0}, {"key": "37330579", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.2631994155732015, "res": {"No": 0.7367965768653901, "Yes": 0.2631994155732015}, "ground_truth": 0}, {"key": "37330579", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9209930489876412, "res": {"Yes": 0.9209930489876412, "No": 0.07900569029961789}, "ground_truth": 1}, {"key": "37330579", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9812380679602131, "res": {"Yes": 0.9812380679602131, "No": 0.018760781096440658}, "ground_truth": 0}, {"key": "37330579", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9586310325474767, "res": {"Yes": 0.9586310325474767, "No": 0.04136566872605914}, "ground_truth": 0}, {"key": "40547658", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8194469479414606, "res": {"Yes": 0.8194469479414606, "No": 0.18054982509383766}, "ground_truth": 0}, {"key": "40547658", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8433964557620531, "res": {"Yes": 0.8433964557620531, "No": 0.1566029590515792}, "ground_truth": 0}, {"key": "40547658", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9235928342788572, "res": {"Yes": 0.9235928342788572, "No": 0.07640481203819444}, "ground_truth": 1}, {"key": "40547658", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9854078604412899, "res": {"Yes": 0.9854078604412899, "No": 0.014590451077941012}, "ground_truth": 0}, {"key": "40547658", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.7410592109969202, "res": {"Yes": 0.7410592109969202, "No": 0.25893927609582446}, "ground_truth": 0}, {"key": "37119340", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0042833836898196285, "res": {"No": 0.9957162863879642, "Yes": 0.0042833836898196285}, "ground_truth": 0}, {"key": "37119340", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988895733804221, "res": {"Yes": 0.9988895733804221, "No": 0.001109644372641271}, "ground_truth": 0}, {"key": "37119340", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995989627169574, "res": {"Yes": 0.9995989627169574, "No": 0.00040061018928344225}, "ground_truth": 1}, {"key": "37119340", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9957699482790103, "res": {"Yes": 0.9957699482790103, "No": 0.004229140914707421}, "ground_truth": 0}, {"key": "37119340", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991752297800891, "res": {"Yes": 0.9991752297800891, "No": 0.0008245011674636563}, "ground_truth": 0}, {"key": "35301627", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.00028024397357216067, "res": {"No": 0.9997195153831671, "Yes": 0.00028024397357216067}, "ground_truth": 0}, {"key": "35301627", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9128384702502504, "res": {"Yes": 0.9128384702502504, "No": 0.08715945689951443}, "ground_truth": 0}, {"key": "35301627", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9632316805808365, "res": {"Yes": 0.9632316805808365, "No": 0.03676796975155947}, "ground_truth": 1}, {"key": "35301627", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9526227899086156, "res": {"Yes": 0.9526227899086156, "No": 0.047376038261958034}, "ground_truth": 0}, {"key": "35301627", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.4381029502464304, "res": {"No": 0.5618962245301569, "Yes": 0.4381029502464304}, "ground_truth": 0}, {"key": "34037168", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8937048175875493, "res": {"Yes": 0.8937048175875493, "No": 0.10627181230126294}, "ground_truth": 0}, {"key": "34037168", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9954532638433409, "res": {"Yes": 0.9954532638433409, "No": 0.004542603378001075}, "ground_truth": 0}, {"key": "34037168", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9899086201425382, "res": {"Yes": 0.9899086201425382, "No": 0.010084116669090688}, "ground_truth": 1}, {"key": "34037168", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992092651557002, "res": {"Yes": 0.9992092651557002, "No": 0.000789178994786981}, "ground_truth": 0}, {"key": "34037168", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9830360087702876, "res": {"Yes": 0.9830360087702876, "No": 0.016958518585270484}, "ground_truth": 0}, {"key": "39703862", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9905763188290807, "res": {"Yes": 0.9905763188290807, "No": 0.009422828217990908}, "ground_truth": 0}, {"key": "39703862", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.948978295073441, "res": {"Yes": 0.948978295073441, "No": 0.05101864116511293}, "ground_truth": 0}, {"key": "39703862", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9857108775658187, "res": {"Yes": 0.9857108775658187, "No": 0.014288312265480383}, "ground_truth": 1}, {"key": "39703862", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9821817561747189, "res": {"Yes": 0.9821817561747189, "No": 0.017816986125556582}, "ground_truth": 0}, {"key": "39703862", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9644418970259396, "res": {"Yes": 0.9644418970259396, "No": 0.035556499942304635}, "ground_truth": 0}, {"key": "16554814", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.022480079893087392, "res": {"No": 0.9775154848200055, "Yes": 0.022480079893087392}, "ground_truth": 0}, {"key": "16554814", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.1662286547783248, "res": {"No": 0.8337551623894652, "Yes": 0.1662286547783248}, "ground_truth": 0}, {"key": "16554814", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.49165507766744393, "res": {"No": 0.5083427978808815, "Yes": 0.49165507766744393}, "ground_truth": 1}, {"key": "16554814", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.02733403909966203, "res": {"No": 0.972650219564393, "Yes": 0.02733403909966203}, "ground_truth": 0}, {"key": "16554814", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.5744694268963477, "res": {"Yes": 0.5744694268963477, "No": 0.4255095572495175}, "ground_truth": 0}, {"key": "32983099", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6382491338063299, "res": {"Yes": 0.6382491338063299, "No": 0.36174986017299626}, "ground_truth": 0}, {"key": "32983099", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997005711677635, "res": {"Yes": 0.9997005711677635, "No": 0.0002992186442905687}, "ground_truth": 0}, {"key": "32983099", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9973754729091507, "res": {"Yes": 0.9973754729091507, "No": 0.0026242844777499114}, "ground_truth": 1}, {"key": "32983099", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986794419052307, "res": {"Yes": 0.9986794419052307, "No": 0.001320330757379333}, "ground_truth": 0}, {"key": "32983099", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9964696100327916, "res": {"Yes": 0.9964696100327916, "No": 0.0035301860642738677}, "ground_truth": 0}, {"key": "41072994", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9190345427588764, "res": {"Yes": 0.9190345427588764, "No": 0.08096319583301866}, "ground_truth": 0}, {"key": "41072994", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8465114510291248, "res": {"Yes": 0.8465114510291248, "No": 0.15348403866498328}, "ground_truth": 0}, {"key": "41072994", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.888137451715929, "res": {"Yes": 0.888137451715929, "No": 0.11186021136959115}, "ground_truth": 1}, {"key": "41072994", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6500526365912193, "res": {"Yes": 0.6500526365912193, "No": 0.34994611272330384}, "ground_truth": 0}, {"key": "41072994", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.6569954867523768, "res": {"Yes": 0.6569954867523768, "No": 0.34300344093021495}, "ground_truth": 0}, {"key": "38396247", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.009876174880810483, "res": {"No": 0.9901231377723674, "Yes": 0.009876174880810483}, "ground_truth": 0}, {"key": "38396247", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9979466566995876, "res": {"Yes": 0.9979466566995876, "No": 0.002052968468102935}, "ground_truth": 0}, {"key": "38396247", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999131909308925, "res": {"Yes": 0.999131909308925, "No": 0.0008677516515227908}, "ground_truth": 1}, {"key": "38396247", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9961825293384867, "res": {"Yes": 0.9961825293384867, "No": 0.003817135525827016}, "ground_truth": 0}, {"key": "38396247", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990935927754241, "res": {"Yes": 0.9990935927754241, "No": 0.000906137044604562}, "ground_truth": 0}, {"key": "37507998", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0001341691152003227, "res": {"No": 0.9998653610546364, "Yes": 0.0001341691152003227}, "ground_truth": 0}, {"key": "37507998", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.15366064683968478, "res": {"No": 0.8463385856482304, "Yes": 0.15366064683968478}, "ground_truth": 0}, {"key": "37507998", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.12990752861235466, "res": {"No": 0.8700913089713844, "Yes": 0.12990752861235466}, "ground_truth": 1}, {"key": "37507998", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.380119728837322, "res": {"No": 0.619878095770619, "Yes": 0.380119728837322}, "ground_truth": 0}, {"key": "37507998", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.024120082179950434, "res": {"No": 0.9758797345097238, "Yes": 0.024120082179950434}, "ground_truth": 0}, {"key": "32593929", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9959832358650327, "res": {"Yes": 0.9959832358650327, "No": 0.004014809017349805}, "ground_truth": 0}, {"key": "32593929", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9835279162106856, "res": {"Yes": 0.9835279162106856, "No": 0.01647103986285935}, "ground_truth": 0}, {"key": "32593929", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9952660602814828, "res": {"Yes": 0.9952660602814828, "No": 0.004730497037855374}, "ground_truth": 1}, {"key": "32593929", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9961281037950864, "res": {"Yes": 0.9961281037950864, "No": 0.0038699267491027786}, "ground_truth": 0}, {"key": "32593929", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9921079093764307, "res": {"Yes": 0.9921079093764307, "No": 0.007890747566554364}, "ground_truth": 0}, {"key": "36056449", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.999534642612353, "res": {"Yes": 0.999534642612353, "No": 0.0004651826657241174}, "ground_truth": 0}, {"key": "36056449", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988058422744736, "res": {"Yes": 0.9988058422744736, "No": 0.0011936259354370072}, "ground_truth": 0}, {"key": "36056449", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994971335797217, "res": {"Yes": 0.9994971335797217, "No": 0.0005025165415828561}, "ground_truth": 1}, {"key": "36056449", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985535483210916, "res": {"Yes": 0.9985535483210916, "No": 0.0014460725263038584}, "ground_truth": 0}, {"key": "36056449", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9984431428613845, "res": {"Yes": 0.9984431428613845, "No": 0.0015566138902871002}, "ground_truth": 0}, {"key": "21986185", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9849002076233244, "res": {"Yes": 0.9849002076233244, "No": 0.01509822638510574}, "ground_truth": 0}, {"key": "21986185", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.999494039541184, "res": {"Yes": 0.999494039541184, "No": 0.0005052986082371544}, "ground_truth": 0}, {"key": "21986185", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985476077439928, "res": {"Yes": 0.9985476077439928, "No": 0.0014517846300395882}, "ground_truth": 1}, {"key": "21986185", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9973261401483368, "res": {"Yes": 0.9973261401483368, "No": 0.0026731669122102716}, "ground_truth": 0}, {"key": "21986185", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990782442780376, "res": {"Yes": 0.9990782442780376, "No": 0.0009214288453046138}, "ground_truth": 0}, {"key": "40757465", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9198925069841708, "res": {"Yes": 0.9198925069841708, "No": 0.08010382285633867}, "ground_truth": 0}, {"key": "40757465", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9347735420919674, "res": {"Yes": 0.9347735420919674, "No": 0.06522407693967987}, "ground_truth": 0}, {"key": "40757465", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9920140415590629, "res": {"Yes": 0.9920140415590629, "No": 0.007985047888705167}, "ground_truth": 1}, {"key": "40757465", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.99097081018018, "res": {"Yes": 0.99097081018018, "No": 0.009027838581045467}, "ground_truth": 0}, {"key": "40757465", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9860064379892066, "res": {"Yes": 0.9860064379892066, "No": 0.013992968857333652}, "ground_truth": 0}, {"key": "35198313", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.996651455920701, "res": {"Yes": 0.996651455920701, "No": 0.003348219724246005}, "ground_truth": 0}, {"key": "35198313", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9890690755166072, "res": {"Yes": 0.9890690755166072, "No": 0.010930750663131308}, "ground_truth": 0}, {"key": "35198313", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9966383127659535, "res": {"Yes": 0.9966383127659535, "No": 0.003361625641957482}, "ground_truth": 1}, {"key": "35198313", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9910911689116504, "res": {"Yes": 0.9910911689116504, "No": 0.008908405586794165}, "ground_truth": 0}, {"key": "35198313", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9948763091132223, "res": {"Yes": 0.9948763091132223, "No": 0.005123386668021954}, "ground_truth": 0}, {"key": "30604618", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5286158459803006, "res": {"Yes": 0.5286158459803006, "No": 0.47138363727351135}, "ground_truth": 0}, {"key": "30604618", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992841291578847, "res": {"Yes": 0.9992841291578847, "No": 0.0007158434930661334}, "ground_truth": 0}, {"key": "30604618", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986712387859852, "res": {"Yes": 0.9986712387859852, "No": 0.0013286976858942641}, "ground_truth": 1}, {"key": "30604618", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9936585837897597, "res": {"Yes": 0.9936585837897597, "No": 0.0063411572223786615}, "ground_truth": 0}, {"key": "30604618", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9532201067817597, "res": {"Yes": 0.9532201067817597, "No": 0.046779509646475134}, "ground_truth": 0}, {"key": "35779006", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9188681436448608, "res": {"Yes": 0.9188681436448608, "No": 0.08112971382052774}, "ground_truth": 0}, {"key": "35779006", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9966081169761293, "res": {"Yes": 0.9966081169761293, "No": 0.003391677982376262}, "ground_truth": 0}, {"key": "35779006", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981497019159965, "res": {"Yes": 0.9981497019159965, "No": 0.00185010478676755}, "ground_truth": 1}, {"key": "35779006", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9980587295076976, "res": {"Yes": 0.9980587295076976, "No": 0.001941062646388552}, "ground_truth": 0}, {"key": "35779006", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.999472962393137, "res": {"Yes": 0.999472962393137, "No": 0.0005267565871974147}, "ground_truth": 0}, {"key": "33858956", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.990413761754918, "res": {"Yes": 0.990413761754918, "No": 0.009585027820354584}, "ground_truth": 0}, {"key": "33858956", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9917774724383486, "res": {"Yes": 0.9917774724383486, "No": 0.008222212052278492}, "ground_truth": 0}, {"key": "33858956", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9700003351202252, "res": {"Yes": 0.9700003351202252, "No": 0.02999662210978277}, "ground_truth": 1}, {"key": "33858956", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986246436706222, "res": {"Yes": 0.9986246436706222, "No": 0.0013749901658476208}, "ground_truth": 0}, {"key": "33858956", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9929830640696312, "res": {"Yes": 0.9929830640696312, "No": 0.007016844243358694}, "ground_truth": 0}, {"key": "38633880", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.4715978826711563, "res": {"No": 0.5283983653093967, "Yes": 0.4715978826711563}, "ground_truth": 0}, {"key": "38633880", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9952377263558086, "res": {"Yes": 0.9952377263558086, "No": 0.004761845384739093}, "ground_truth": 0}, {"key": "38633880", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9963493651295132, "res": {"Yes": 0.9963493651295132, "No": 0.0036499149975550388}, "ground_truth": 1}, {"key": "38633880", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9973610047862678, "res": {"Yes": 0.9973610047862678, "No": 0.0026386311502146894}, "ground_truth": 0}, {"key": "38633880", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9924748329217251, "res": {"Yes": 0.9924748329217251, "No": 0.007522800525816286}, "ground_truth": 0}, {"key": "36654905", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9459652653437235, "res": {"Yes": 0.9459652653437235, "No": 0.0540338811751395}, "ground_truth": 0}, {"key": "36654905", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9709374547492361, "res": {"Yes": 0.9709374547492361, "No": 0.029061872129911848}, "ground_truth": 0}, {"key": "36654905", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9509476359930338, "res": {"Yes": 0.9509476359930338, "No": 0.04905131421282178}, "ground_truth": 1}, {"key": "36654905", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9960553760493027, "res": {"Yes": 0.9960553760493027, "No": 0.003944130320754486}, "ground_truth": 0}, {"key": "36654905", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.990992699976167, "res": {"Yes": 0.990992699976167, "No": 0.009007056079382625}, "ground_truth": 0}, {"key": "26547482", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.30771895387422077, "res": {"No": 0.6922792252874269, "Yes": 0.30771895387422077}, "ground_truth": 0}, {"key": "26547482", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.990210564554434, "res": {"Yes": 0.990210564554434, "No": 0.009788973302524545}, "ground_truth": 0}, {"key": "26547482", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995447629318528, "res": {"Yes": 0.9995447629318528, "No": 0.0004546730357860512}, "ground_truth": 1}, {"key": "26547482", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9951828210031077, "res": {"Yes": 0.9951828210031077, "No": 0.004816590337996984}, "ground_truth": 0}, {"key": "26547482", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9977359769750191, "res": {"Yes": 0.9977359769750191, "No": 0.002263058640364208}, "ground_truth": 0}, {"key": "36439068", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9368357927190325, "res": {"Yes": 0.9368357927190325, "No": 0.06316231104108075}, "ground_truth": 0}, {"key": "36439068", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.7084697971834981, "res": {"Yes": 0.7084697971834981, "No": 0.29152506226267444}, "ground_truth": 0}, {"key": "36439068", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9575136195072854, "res": {"Yes": 0.9575136195072854, "No": 0.04248420156678667}, "ground_truth": 1}, {"key": "36439068", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9858676091830819, "res": {"Yes": 0.9858676091830819, "No": 0.014131371839286528}, "ground_truth": 0}, {"key": "36439068", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.983611640620883, "res": {"Yes": 0.983611640620883, "No": 0.016386825338748405}, "ground_truth": 0}, {"key": "30501258", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6648201407141938, "res": {"Yes": 0.6648201407141938, "No": 0.33515059863814844}, "ground_truth": 0}, {"key": "30501258", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9294954936282072, "res": {"Yes": 0.9294954936282072, "No": 0.07049946202810854}, "ground_truth": 0}, {"key": "30501258", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9760602752965359, "res": {"Yes": 0.9760602752965359, "No": 0.023930506772456152}, "ground_truth": 1}, {"key": "30501258", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9340015885049279, "res": {"Yes": 0.9340015885049279, "No": 0.06598774077776744}, "ground_truth": 0}, {"key": "30501258", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.48399072774532254, "res": {"No": 0.5159725048988617, "Yes": 0.48399072774532254}, "ground_truth": 0}, {"key": "37560941", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.252652588979517, "res": {"No": 0.7473443969514506, "Yes": 0.252652588979517}, "ground_truth": 0}, {"key": "37560941", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9720366465699019, "res": {"Yes": 0.9720366465699019, "No": 0.027962674826735198}, "ground_truth": 0}, {"key": "37560941", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9975954874826372, "res": {"Yes": 0.9975954874826372, "No": 0.0024041376625326583}, "ground_truth": 1}, {"key": "37560941", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999627549516908, "res": {"Yes": 0.999627549516908, "No": 0.0003719106205123101}, "ground_truth": 0}, {"key": "37560941", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9795367883576288, "res": {"Yes": 0.9795367883576288, "No": 0.020462920228044384}, "ground_truth": 0}, {"key": "36801665", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8102262575600937, "res": {"Yes": 0.8102262575600937, "No": 0.18977160667861306}, "ground_truth": 0}, {"key": "36801665", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.924415207043001, "res": {"Yes": 0.924415207043001, "No": 0.07558276484714505}, "ground_truth": 0}, {"key": "36801665", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9847511709975684, "res": {"Yes": 0.9847511709975684, "No": 0.015246062591301655}, "ground_truth": 1}, {"key": "36801665", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9662898480831817, "res": {"Yes": 0.9662898480831817, "No": 0.03370955670752092}, "ground_truth": 0}, {"key": "36801665", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9016007720012544, "res": {"Yes": 0.9016007720012544, "No": 0.09839559915092952}, "ground_truth": 0}, {"key": "34954610", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.33378773900160785, "res": {"No": 0.6662109945375904, "Yes": 0.33378773900160785}, "ground_truth": 0}, {"key": "34954610", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990888326652866, "res": {"Yes": 0.9990888326652866, "No": 0.0009108493883529863}, "ground_truth": 0}, {"key": "34954610", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9980174233066199, "res": {"Yes": 0.9980174233066199, "No": 0.001982566621951966}, "ground_truth": 1}, {"key": "34954610", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9948214422028948, "res": {"Yes": 0.9948214422028948, "No": 0.0051768164323344815}, "ground_truth": 0}, {"key": "34954610", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9938006750030198, "res": {"Yes": 0.9938006750030198, "No": 0.006198340401130831}, "ground_truth": 0}, {"key": "37020510", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9892403001709548, "res": {"Yes": 0.9892403001709548, "No": 0.010757812975205927}, "ground_truth": 0}, {"key": "37020510", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9891170089436103, "res": {"Yes": 0.9891170089436103, "No": 0.010880903486102345}, "ground_truth": 0}, {"key": "37020510", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.976909743964968, "res": {"Yes": 0.976909743964968, "No": 0.02308893224185637}, "ground_truth": 1}, {"key": "37020510", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9967245161401084, "res": {"Yes": 0.9967245161401084, "No": 0.003275206036386666}, "ground_truth": 0}, {"key": "37020510", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9915480562171585, "res": {"Yes": 0.9915480562171585, "No": 0.008451536667538059}, "ground_truth": 0}, {"key": "38064637", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9972646126084879, "res": {"Yes": 0.9972646126084879, "No": 0.0027335217030626316}, "ground_truth": 0}, {"key": "38064637", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988416437131842, "res": {"Yes": 0.9988416437131842, "No": 0.001158175218459612}, "ground_truth": 0}, {"key": "38064637", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995645382410059, "res": {"Yes": 0.9995645382410059, "No": 0.0004351187829056067}, "ground_truth": 1}, {"key": "38064637", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996500562246114, "res": {"Yes": 0.9996500562246114, "No": 0.0003496255966153458}, "ground_truth": 0}, {"key": "38064637", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9956256362223244, "res": {"Yes": 0.9956256362223244, "No": 0.0043740208059758375}, "ground_truth": 0}, {"key": "40886108", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0037503429119268394, "res": {"No": 0.9962487784764197, "Yes": 0.0037503429119268394}, "ground_truth": 0}, {"key": "40886108", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.990367800689942, "res": {"Yes": 0.990367800689942, "No": 0.009631278497907802}, "ground_truth": 0}, {"key": "40886108", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9920667268397746, "res": {"Yes": 0.9920667268397746, "No": 0.007933005590087767}, "ground_truth": 1}, {"key": "40886108", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9909617904049148, "res": {"Yes": 0.9909617904049148, "No": 0.009038025017155803}, "ground_truth": 0}, {"key": "40886108", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9980519591992691, "res": {"Yes": 0.9980519591992691, "No": 0.0019477099132892725}, "ground_truth": 0}, {"key": "38554603", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9547011246164018, "res": {"Yes": 0.9547011246164018, "No": 0.045298593297981256}, "ground_truth": 0}, {"key": "38554603", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.98656001803861, "res": {"Yes": 0.98656001803861, "No": 0.013439367580722783}, "ground_truth": 0}, {"key": "38554603", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9969327649065199, "res": {"Yes": 0.9969327649065199, "No": 0.0030670419993255858}, "ground_truth": 1}, {"key": "38554603", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9205235138367597, "res": {"Yes": 0.9205235138367597, "No": 0.07947430635995135}, "ground_truth": 0}, {"key": "38554603", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9974478123828076, "res": {"Yes": 0.9974478123828076, "No": 0.0025515907264728145}, "ground_truth": 0}, {"key": "39115586", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8861829321444441, "res": {"Yes": 0.8861829321444441, "No": 0.11381585996466653}, "ground_truth": 0}, {"key": "39115586", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9904412372041793, "res": {"Yes": 0.9904412372041793, "No": 0.009558564692101043}, "ground_truth": 0}, {"key": "39115586", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9958849710308616, "res": {"Yes": 0.9958849710308616, "No": 0.004114756995245932}, "ground_truth": 1}, {"key": "39115586", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999531988512079, "res": {"Yes": 0.9999531988512079, "No": 4.666911318520462e-05}, "ground_truth": 0}, {"key": "39115586", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9985358495157216, "res": {"Yes": 0.9985358495157216, "No": 0.0014638292540383809}, "ground_truth": 0}, {"key": "38786314", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8448866353282408, "res": {"Yes": 0.8448866353282408, "No": 0.1551075549188188}, "ground_truth": 0}, {"key": "38786314", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.995960884658446, "res": {"Yes": 0.995960884658446, "No": 0.004037972780303185}, "ground_truth": 0}, {"key": "38786314", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.990032697124905, "res": {"Yes": 0.990032697124905, "No": 0.009965411815296827}, "ground_truth": 1}, {"key": "38786314", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9911409368131184, "res": {"Yes": 0.9911409368131184, "No": 0.008857683540148894}, "ground_truth": 0}, {"key": "38786314", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9783625794684397, "res": {"Yes": 0.9783625794684397, "No": 0.02163150850754168}, "ground_truth": 0}, {"key": "38721078", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9154305847230064, "res": {"Yes": 0.9154305847230064, "No": 0.08456822279602356}, "ground_truth": 0}, {"key": "38721078", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9946382577278922, "res": {"Yes": 0.9946382577278922, "No": 0.005360896823919903}, "ground_truth": 0}, {"key": "38721078", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.991227957851616, "res": {"Yes": 0.991227957851616, "No": 0.008771875863275587}, "ground_truth": 1}, {"key": "38721078", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9929778920315132, "res": {"Yes": 0.9929778920315132, "No": 0.007021252205498765}, "ground_truth": 0}, {"key": "38721078", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.4781155879157807, "res": {"No": 0.5218801579128696, "Yes": 0.4781155879157807}, "ground_truth": 0}, {"key": "39475467", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.991996804472833, "res": {"Yes": 0.991996804472833, "No": 0.00800296404738137}, "ground_truth": 0}, {"key": "39475467", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981130027222517, "res": {"Yes": 0.9981130027222517, "No": 0.0018867924651814476}, "ground_truth": 0}, {"key": "39475467", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6131928574784801, "res": {"Yes": 0.6131928574784801, "No": 0.3868066976558657}, "ground_truth": 1}, {"key": "39475467", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998675636144865, "res": {"Yes": 0.998675636144865, "No": 0.0013242906783266309}, "ground_truth": 0}, {"key": "39475467", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.993250905136651, "res": {"Yes": 0.993250905136651, "No": 0.006748925310720922}, "ground_truth": 0}, {"key": "35691234", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.08671863341550609, "res": {"No": 0.9132803051576949, "Yes": 0.08671863341550609}, "ground_truth": 0}, {"key": "35691234", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9965482183429849, "res": {"Yes": 0.9965482183429849, "No": 0.0034517487234326737}, "ground_truth": 0}, {"key": "35691234", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9970996056942804, "res": {"Yes": 0.9970996056942804, "No": 0.002900007371779526}, "ground_truth": 1}, {"key": "35691234", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9968076671648386, "res": {"Yes": 0.9968076671648386, "No": 0.003191982761314033}, "ground_truth": 0}, {"key": "35691234", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992877027042486, "res": {"Yes": 0.9992877027042486, "No": 0.000712169082018413}, "ground_truth": 0}, {"key": "36871390", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8237066744969169, "res": {"Yes": 0.8237066744969169, "No": 0.176292858157317}, "ground_truth": 0}, {"key": "36871390", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9107018436305634, "res": {"Yes": 0.9107018436305634, "No": 0.08929640941738892}, "ground_truth": 0}, {"key": "36871390", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9960608160205942, "res": {"Yes": 0.9960608160205942, "No": 0.003938439894304738}, "ground_truth": 1}, {"key": "36871390", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9970715205858082, "res": {"Yes": 0.9970715205858082, "No": 0.0029280157611240552}, "ground_truth": 0}, {"key": "36871390", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9820127421490416, "res": {"Yes": 0.9820127421490416, "No": 0.01798619067940653}, "ground_truth": 0}, {"key": "31730844", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.960168281166967, "res": {"Yes": 0.960168281166967, "No": 0.03982987580183294}, "ground_truth": 0}, {"key": "31730844", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9908276470416778, "res": {"Yes": 0.9908276470416778, "No": 0.009170990346659365}, "ground_truth": 0}, {"key": "31730844", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993936543718529, "res": {"Yes": 0.9993936543718529, "No": 0.0006058082749035496}, "ground_truth": 1}, {"key": "31730844", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989585639713053, "res": {"Yes": 0.9989585639713053, "No": 0.001040638928645997}, "ground_truth": 0}, {"key": "31730844", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9796466074439903, "res": {"Yes": 0.9796466074439903, "No": 0.020350927311480683}, "ground_truth": 0}, {"key": "30810940", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.4279180815358727, "res": {"No": 0.5720783908796644, "Yes": 0.4279180815358727}, "ground_truth": 0}, {"key": "30810940", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8818477472868608, "res": {"Yes": 0.8818477472868608, "No": 0.118144302394894}, "ground_truth": 0}, {"key": "30810940", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9900902989028433, "res": {"Yes": 0.9900902989028433, "No": 0.009906596803125162}, "ground_truth": 1}, {"key": "30810940", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9859753792755873, "res": {"Yes": 0.9859753792755873, "No": 0.014021646007823818}, "ground_truth": 0}, {"key": "30810940", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9923427472815927, "res": {"Yes": 0.9923427472815927, "No": 0.007652062956355394}, "ground_truth": 0}, {"key": "39352003", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9489012145950777, "res": {"Yes": 0.9489012145950777, "No": 0.051097847803897305}, "ground_truth": 0}, {"key": "39352003", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.4377510977979075, "res": {"No": 0.5622443708614189, "Yes": 0.4377510977979075}, "ground_truth": 0}, {"key": "39352003", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.964904713505363, "res": {"Yes": 0.964904713505363, "No": 0.03509440369889326}, "ground_truth": 1}, {"key": "39352003", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9539093814913994, "res": {"Yes": 0.9539093814913994, "No": 0.046089851233229694}, "ground_truth": 0}, {"key": "39352003", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9896619636695012, "res": {"Yes": 0.9896619636695012, "No": 0.0103375498127317}, "ground_truth": 0}, {"key": "40118123", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0061620257498666104, "res": {"No": 0.993837173599302, "Yes": 0.0061620257498666104}, "ground_truth": 0}, {"key": "40118123", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986516257745188, "res": {"Yes": 0.9986516257745188, "No": 0.001347690893876796}, "ground_truth": 0}, {"key": "40118123", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991947448920517, "res": {"Yes": 0.9991947448920517, "No": 0.0008048983997513959}, "ground_truth": 1}, {"key": "40118123", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989005170747584, "res": {"Yes": 0.9989005170747584, "No": 0.0010989775845417563}, "ground_truth": 0}, {"key": "40118123", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9929745993222826, "res": {"Yes": 0.9929745993222826, "No": 0.007024979372960636}, "ground_truth": 0}, {"key": "37114191", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9844977099601244, "res": {"Yes": 0.9844977099601244, "No": 0.015501786556335355}, "ground_truth": 0}, {"key": "37114191", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9968418943398721, "res": {"Yes": 0.9968418943398721, "No": 0.003157828408885456}, "ground_truth": 0}, {"key": "37114191", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996730510894477, "res": {"Yes": 0.9996730510894477, "No": 0.0003265981636731175}, "ground_truth": 1}, {"key": "37114191", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999315050358871, "res": {"Yes": 0.9999315050358871, "No": 6.841694705925567e-05}, "ground_truth": 0}, {"key": "37114191", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997674046544927, "res": {"Yes": 0.9997674046544927, "No": 0.00023188639104700998}, "ground_truth": 0}, {"key": "39268203", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 5.26910471579283e-05, "res": {"No": 0.9999467621731833, "Yes": 5.26910471579283e-05}, "ground_truth": 0}, {"key": "39268203", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9472513915902983, "res": {"Yes": 0.9472513915902983, "No": 0.052739546335477186}, "ground_truth": 0}, {"key": "39268203", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9599712719692971, "res": {"Yes": 0.9599712719692971, "No": 0.04002639369874105}, "ground_truth": 1}, {"key": "39268203", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9643451008505325, "res": {"Yes": 0.9643451008505325, "No": 0.035653027958726984}, "ground_truth": 0}, {"key": "39268203", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.803559271402987, "res": {"Yes": 0.803559271402987, "No": 0.19643898594222853}, "ground_truth": 0}, {"key": "37950968", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.725973765852229, "res": {"Yes": 0.725973765852229, "No": 0.2740192005634831}, "ground_truth": 0}, {"key": "37950968", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9813053390418202, "res": {"Yes": 0.9813053390418202, "No": 0.018690561783391967}, "ground_truth": 0}, {"key": "37950968", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981600388078339, "res": {"Yes": 0.9981600388078339, "No": 0.0018383331769734955}, "ground_truth": 1}, {"key": "37950968", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9814329955429215, "res": {"Yes": 0.9814329955429215, "No": 0.018562313678341862}, "ground_truth": 0}, {"key": "37950968", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9184284096894234, "res": {"Yes": 0.9184284096894234, "No": 0.0815692766135008}, "ground_truth": 0}, {"key": "34959807", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9778851533062393, "res": {"Yes": 0.9778851533062393, "No": 0.02211452160601616}, "ground_truth": 0}, {"key": "34959807", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9932777219818719, "res": {"Yes": 0.9932777219818719, "No": 0.00672205074913531}, "ground_truth": 0}, {"key": "34959807", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993332968953459, "res": {"Yes": 0.9993332968953459, "No": 0.0006662547187218214}, "ground_truth": 1}, {"key": "34959807", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984729728941653, "res": {"Yes": 0.9984729728941653, "No": 0.001526936253273229}, "ground_truth": 0}, {"key": "34959807", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997321438498659, "res": {"Yes": 0.9997321438498659, "No": 0.0002669868905054382}, "ground_truth": 0}, {"key": "35631314", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0007226725087802583, "res": {"No": 0.9992763901319859, "Yes": 0.0007226725087802583}, "ground_truth": 0}, {"key": "35631314", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986616070478028, "res": {"Yes": 0.9986616070478028, "No": 0.0013380887354577362}, "ground_truth": 0}, {"key": "35631314", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992630673683275, "res": {"Yes": 0.9992630673683275, "No": 0.0007357504200035166}, "ground_truth": 1}, {"key": "35631314", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993538946160332, "res": {"Yes": 0.9993538946160332, "No": 0.0006455456692413534}, "ground_truth": 0}, {"key": "35631314", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993355602379265, "res": {"Yes": 0.9993355602379265, "No": 0.000663975371692679}, "ground_truth": 0}, {"key": "38082365", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9631788073553927, "res": {"Yes": 0.9631788073553927, "No": 0.03682026890396663}, "ground_truth": 0}, {"key": "38082365", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9842025978487937, "res": {"Yes": 0.9842025978487937, "No": 0.015796583444844827}, "ground_truth": 0}, {"key": "38082365", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.993041252993285, "res": {"Yes": 0.993041252993285, "No": 0.006958528147915659}, "ground_truth": 1}, {"key": "38082365", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8811539444519187, "res": {"Yes": 0.8811539444519187, "No": 0.11884520867974102}, "ground_truth": 0}, {"key": "38082365", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9602487399186438, "res": {"Yes": 0.9602487399186438, "No": 0.039749882255975776}, "ground_truth": 0}, {"key": "37242829", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9918651950679765, "res": {"Yes": 0.9918651950679765, "No": 0.008133432034787754}, "ground_truth": 0}, {"key": "37242829", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9929877689338331, "res": {"Yes": 0.9929877689338331, "No": 0.007011255448737054}, "ground_truth": 0}, {"key": "37242829", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9711118944764378, "res": {"Yes": 0.9711118944764378, "No": 0.028887364326755673}, "ground_truth": 1}, {"key": "37242829", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9860941718150275, "res": {"Yes": 0.9860941718150275, "No": 0.01390338421185992}, "ground_truth": 0}, {"key": "37242829", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9719573598023988, "res": {"Yes": 0.9719573598023988, "No": 0.0280409314149828}, "ground_truth": 0}, {"key": "38556068", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.3660050675176434, "res": {"No": 0.6339942144878714, "Yes": 0.3660050675176434}, "ground_truth": 0}, {"key": "38556068", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989065751250944, "res": {"Yes": 0.9989065751250944, "No": 0.0010932684407371134}, "ground_truth": 0}, {"key": "38556068", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9980633658006822, "res": {"Yes": 0.9980633658006822, "No": 0.001936213533462427}, "ground_truth": 1}, {"key": "38556068", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993853261893952, "res": {"Yes": 0.9993853261893952, "No": 0.0006143723379575013}, "ground_truth": 0}, {"key": "38556068", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991201294433053, "res": {"Yes": 0.9991201294433053, "No": 0.0008790122711709809}, "ground_truth": 0}, {"key": "32969336", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.01559801525600307, "res": {"No": 0.9844012467091681, "Yes": 0.01559801525600307}, "ground_truth": 0}, {"key": "32969336", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990097181530182, "res": {"Yes": 0.9990097181530182, "No": 0.0009899224017285986}, "ground_truth": 0}, {"key": "32969336", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9962209772470494, "res": {"Yes": 0.9962209772470494, "No": 0.003778355563943633}, "ground_truth": 1}, {"key": "32969336", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9976507835382534, "res": {"Yes": 0.9976507835382534, "No": 0.002348290646421545}, "ground_truth": 0}, {"key": "32969336", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996482724706356, "res": {"Yes": 0.9996482724706356, "No": 0.00035131882972529253}, "ground_truth": 0}, {"key": "36825153", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9682571830737736, "res": {"Yes": 0.9682571830737736, "No": 0.03174135735469684}, "ground_truth": 0}, {"key": "36825153", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9966335712704594, "res": {"Yes": 0.9966335712704594, "No": 0.0033658568915389936}, "ground_truth": 0}, {"key": "36825153", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9973627845785689, "res": {"Yes": 0.9973627845785689, "No": 0.0026360903170804142}, "ground_truth": 1}, {"key": "36825153", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998380711318946, "res": {"Yes": 0.9998380711318946, "No": 0.00016184669466702008}, "ground_truth": 0}, {"key": "36825153", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9548525006606212, "res": {"Yes": 0.9548525006606212, "No": 0.045146361016883886}, "ground_truth": 0}, {"key": "41050146", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6776227038049232, "res": {"Yes": 0.6776227038049232, "No": 0.32237410211615547}, "ground_truth": 0}, {"key": "41050146", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9801183052114736, "res": {"Yes": 0.9801183052114736, "No": 0.019879967749769894}, "ground_truth": 0}, {"key": "41050146", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9969641587138849, "res": {"Yes": 0.9969641587138849, "No": 0.0030346566227446668}, "ground_truth": 1}, {"key": "41050146", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9622480762893143, "res": {"Yes": 0.9622480762893143, "No": 0.03775121946878447}, "ground_truth": 0}, {"key": "41050146", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9646608289642696, "res": {"Yes": 0.9646608289642696, "No": 0.03533819106557951}, "ground_truth": 0}, {"key": "27865037", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9281776605137247, "res": {"Yes": 0.9281776605137247, "No": 0.07182090423272149}, "ground_truth": 0}, {"key": "27865037", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.999748228499765, "res": {"Yes": 0.999748228499765, "No": 0.0002515759816840126}, "ground_truth": 0}, {"key": "27865037", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996609000373932, "res": {"Yes": 0.9996609000373932, "No": 0.00033894354444936615}, "ground_truth": 1}, {"key": "27865037", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9848023833290764, "res": {"Yes": 0.9848023833290764, "No": 0.015197149524484662}, "ground_truth": 0}, {"key": "27865037", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995053509192148, "res": {"Yes": 0.9995053509192148, "No": 0.0004943920658438647}, "ground_truth": 0}, {"key": "39868565", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8224614308711277, "res": {"Yes": 0.8224614308711277, "No": 0.17753489461633948}, "ground_truth": 0}, {"key": "39868565", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9510592143717722, "res": {"Yes": 0.9510592143717722, "No": 0.048940059754191384}, "ground_truth": 0}, {"key": "39868565", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8756258510123527, "res": {"Yes": 0.8756258510123527, "No": 0.12436605570735788}, "ground_truth": 1}, {"key": "39868565", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7532030219944544, "res": {"Yes": 0.7532030219944544, "No": 0.24679330859019585}, "ground_truth": 0}, {"key": "39868565", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.6116608785912165, "res": {"Yes": 0.6116608785912165, "No": 0.3883180339423105}, "ground_truth": 0}, {"key": "37761968", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9819041100523898, "res": {"Yes": 0.9819041100523898, "No": 0.018093478671938612}, "ground_truth": 0}, {"key": "37761968", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9541817261954867, "res": {"Yes": 0.9541817261954867, "No": 0.04581723264523225}, "ground_truth": 0}, {"key": "37761968", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9958894674616565, "res": {"Yes": 0.9958894674616565, "No": 0.004110086107350108}, "ground_truth": 1}, {"key": "37761968", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991367924781516, "res": {"Yes": 0.9991367924781516, "No": 0.0008629265332454415}, "ground_truth": 0}, {"key": "37761968", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9898078179308898, "res": {"Yes": 0.9898078179308898, "No": 0.01019047315049158}, "ground_truth": 0}, {"key": "16326139", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8966307066198372, "res": {"Yes": 0.8966307066198372, "No": 0.10336777699021192}, "ground_truth": 0}, {"key": "16326139", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8244664657790312, "res": {"Yes": 0.8244664657790312, "No": 0.17553166938844017}, "ground_truth": 0}, {"key": "16326139", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8999543762484865, "res": {"Yes": 0.8999543762484865, "No": 0.10004422907757797}, "ground_truth": 1}, {"key": "16326139", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8118805869835346, "res": {"Yes": 0.8118805869835346, "No": 0.18811721996536498}, "ground_truth": 0}, {"key": "16326139", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9920886656459484, "res": {"Yes": 0.9920886656459484, "No": 0.007910156597422453}, "ground_truth": 0}, {"key": "36568381", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9668057042845639, "res": {"Yes": 0.9668057042845639, "No": 0.033189716334588136}, "ground_truth": 0}, {"key": "36568381", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9793088786241496, "res": {"Yes": 0.9793088786241496, "No": 0.02068171091356107}, "ground_truth": 0}, {"key": "36568381", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9834778696824859, "res": {"Yes": 0.9834778696824859, "No": 0.016517827942695386}, "ground_truth": 1}, {"key": "36568381", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9328493029412782, "res": {"Yes": 0.9328493029412782, "No": 0.06713600444496091}, "ground_truth": 0}, {"key": "36568381", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9868558607044396, "res": {"Yes": 0.9868558607044396, "No": 0.013140050798361527}, "ground_truth": 0}, {"key": "39855613", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9604621472233696, "res": {"Yes": 0.9604621472233696, "No": 0.03953668910804333}, "ground_truth": 0}, {"key": "39855613", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9939655335735761, "res": {"Yes": 0.9939655335735761, "No": 0.006034127549520025}, "ground_truth": 0}, {"key": "39855613", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9968950880400442, "res": {"Yes": 0.9968950880400442, "No": 0.0031046518359890314}, "ground_truth": 1}, {"key": "39855613", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9934454706562351, "res": {"Yes": 0.9934454706562351, "No": 0.006554205331552307}, "ground_truth": 0}, {"key": "39855613", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9871077285872484, "res": {"Yes": 0.9871077285872484, "No": 0.012891939601494649}, "ground_truth": 0}, {"key": "29856302", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8944567931453831, "res": {"Yes": 0.8944567931453831, "No": 0.10554076527380161}, "ground_truth": 0}, {"key": "29856302", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.7838670986730659, "res": {"Yes": 0.7838670986730659, "No": 0.21613140495467265}, "ground_truth": 0}, {"key": "29856302", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9418966469864232, "res": {"Yes": 0.9418966469864232, "No": 0.05810163351168326}, "ground_truth": 1}, {"key": "29856302", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9810650124520001, "res": {"Yes": 0.9810650124520001, "No": 0.018933239460644187}, "ground_truth": 0}, {"key": "29856302", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9513768764887455, "res": {"Yes": 0.9513768764887455, "No": 0.04862066172728113}, "ground_truth": 0}, {"key": "35641106", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9593178480943277, "res": {"Yes": 0.9593178480943277, "No": 0.04068130616634527}, "ground_truth": 0}, {"key": "35641106", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993635366287932, "res": {"Yes": 0.9993635366287932, "No": 0.0006361959147497568}, "ground_truth": 0}, {"key": "35641106", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9908917923273286, "res": {"Yes": 0.9908917923273286, "No": 0.009107156918572402}, "ground_truth": 1}, {"key": "35641106", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9891626092779421, "res": {"Yes": 0.9891626092779421, "No": 0.010837282568069168}, "ground_truth": 0}, {"key": "35641106", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9985812500785947, "res": {"Yes": 0.9985812500785947, "No": 0.0014181373411874803}, "ground_truth": 0}, {"key": "39474558", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.997464893822854, "res": {"Yes": 0.997464893822854, "No": 0.0025348021305380947}, "ground_truth": 0}, {"key": "39474558", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9947163345243405, "res": {"Yes": 0.9947163345243405, "No": 0.00528299398160961}, "ground_truth": 0}, {"key": "39474558", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9988205876540845, "res": {"Yes": 0.9988205876540845, "No": 0.001179111606388422}, "ground_truth": 1}, {"key": "39474558", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9913170990249484, "res": {"Yes": 0.9913170990249484, "No": 0.008681412787652436}, "ground_truth": 0}, {"key": "39474558", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.7859958865213672, "res": {"Yes": 0.7859958865213672, "No": 0.214002389100837}, "ground_truth": 0}, {"key": "34338135", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.721288812343285, "res": {"Yes": 0.721288812343285, "No": 0.2787102966207437}, "ground_truth": 0}, {"key": "34338135", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9843827608117299, "res": {"Yes": 0.9843827608117299, "No": 0.015616363065467317}, "ground_truth": 0}, {"key": "34338135", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998547488717125, "res": {"Yes": 0.998547488717125, "No": 0.001452238674079488}, "ground_truth": 1}, {"key": "34338135", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9860275298785092, "res": {"Yes": 0.9860275298785092, "No": 0.013971775779667812}, "ground_truth": 0}, {"key": "34338135", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9045049966045701, "res": {"Yes": 0.9045049966045701, "No": 0.09549427317953188}, "ground_truth": 0}, {"key": "35870330", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9981881799312278, "res": {"Yes": 0.9981881799312278, "No": 0.0018111338379916979}, "ground_truth": 0}, {"key": "35870330", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9668236425640141, "res": {"Yes": 0.9668236425640141, "No": 0.03317602839993073}, "ground_truth": 0}, {"key": "35870330", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9930012868662396, "res": {"Yes": 0.9930012868662396, "No": 0.006998472976069287}, "ground_truth": 1}, {"key": "35870330", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.991826148087583, "res": {"Yes": 0.991826148087583, "No": 0.00817379478482467}, "ground_truth": 0}, {"key": "35870330", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.992714672002444, "res": {"Yes": 0.992714672002444, "No": 0.007284576311876359}, "ground_truth": 0}, {"key": "24478245", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9982317834401778, "res": {"Yes": 0.9982317834401778, "No": 0.0017678520506981728}, "ground_truth": 0}, {"key": "24478245", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988895733804221, "res": {"Yes": 0.9988895733804221, "No": 0.0011103303448071293}, "ground_truth": 0}, {"key": "24478245", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991489224720983, "res": {"Yes": 0.9991489224720983, "No": 0.0008506997600876918}, "ground_truth": 1}, {"key": "24478245", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991811776681166, "res": {"Yes": 0.9991811776681166, "No": 0.000818707811730346}, "ground_truth": 0}, {"key": "24478245", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988316495534654, "res": {"Yes": 0.9988316495534654, "No": 0.0011683030121630439}, "ground_truth": 0}, {"key": "38485946", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.008233462795418828, "res": {"No": 0.9917322206482184, "Yes": 0.008233462795418828}, "ground_truth": 0}, {"key": "38485946", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.5898889028897161, "res": {"Yes": 0.5898889028897161, "No": 0.41007034534037684}, "ground_truth": 0}, {"key": "38485946", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.0374470553855286, "res": {"No": 0.9625209950684204, "Yes": 0.0374470553855286}, "ground_truth": 1}, {"key": "38485946", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5678316074344915, "res": {"Yes": 0.5678316074344915, "No": 0.43215986557139424}, "ground_truth": 0}, {"key": "38485946", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.05454952360073204, "res": {"No": 0.9454161046697533, "Yes": 0.05454952360073204}, "ground_truth": 0}, {"key": "32509613", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9604937056465956, "res": {"Yes": 0.9604937056465956, "No": 0.039505878316821626}, "ground_truth": 0}, {"key": "32509613", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9983952565798948, "res": {"Yes": 0.9983952565798948, "No": 0.0016045290375640967}, "ground_truth": 0}, {"key": "32509613", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992898468781727, "res": {"Yes": 0.9992898468781727, "No": 0.0007098893110586237}, "ground_truth": 1}, {"key": "32509613", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9947487678786429, "res": {"Yes": 0.9947487678786429, "No": 0.005250678356309354}, "ground_truth": 0}, {"key": "32509613", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8188441551401947, "res": {"Yes": 0.8188441551401947, "No": 0.18115425012613362}, "ground_truth": 0}, {"key": "34078819", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7192786386240744, "res": {"Yes": 0.7192786386240744, "No": 0.28072105205833026}, "ground_truth": 0}, {"key": "34078819", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.998708456660929, "res": {"Yes": 0.998708456660929, "No": 0.0012914573107262934}, "ground_truth": 0}, {"key": "34078819", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9974417705604721, "res": {"Yes": 0.9974417705604721, "No": 0.0025581140275030088}, "ground_truth": 1}, {"key": "34078819", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9943459764967767, "res": {"Yes": 0.9943459764967767, "No": 0.005653903994259619}, "ground_truth": 0}, {"key": "34078819", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998849006744357, "res": {"Yes": 0.9998849006744357, "No": 0.00011503616508950716}, "ground_truth": 0}, {"key": "39523865", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9938999367960893, "res": {"Yes": 0.9938999367960893, "No": 0.006099407766348616}, "ground_truth": 0}, {"key": "39523865", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9945736332066081, "res": {"Yes": 0.9945736332066081, "No": 0.005424877285477139}, "ground_truth": 0}, {"key": "39523865", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9957117980077761, "res": {"Yes": 0.9957117980077761, "No": 0.004287157727488804}, "ground_truth": 1}, {"key": "39523865", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994179465188867, "res": {"Yes": 0.9994179465188867, "No": 0.0005814464381791647}, "ground_truth": 0}, {"key": "39523865", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991170329150514, "res": {"Yes": 0.9991170329150514, "No": 0.0008823047853754397}, "ground_truth": 0}, {"key": "33146158", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0483646066658674, "res": {"No": 0.9516339665915436, "Yes": 0.0483646066658674}, "ground_truth": 0}, {"key": "33146158", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8902270379214222, "res": {"Yes": 0.8902270379214222, "No": 0.10977117578382228}, "ground_truth": 0}, {"key": "33146158", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6403653539264952, "res": {"Yes": 0.6403653539264952, "No": 0.35963332797226216}, "ground_truth": 1}, {"key": "33146158", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9909099377039682, "res": {"Yes": 0.9909099377039682, "No": 0.009088930813391113}, "ground_truth": 0}, {"key": "33146158", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.960703588765386, "res": {"Yes": 0.960703588765386, "No": 0.039295196265811974}, "ground_truth": 0}, {"key": "32083974", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7625312253960284, "res": {"Yes": 0.7625312253960284, "No": 0.23746524572612757}, "ground_truth": 0}, {"key": "32083974", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9882530693168957, "res": {"Yes": 0.9882530693168957, "No": 0.01174550106262077}, "ground_truth": 0}, {"key": "32083974", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9891795221248231, "res": {"Yes": 0.9891795221248231, "No": 0.010818632498624433}, "ground_truth": 1}, {"key": "32083974", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9846049472907323, "res": {"Yes": 0.9846049472907323, "No": 0.015394387254333789}, "ground_truth": 0}, {"key": "32083974", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9496194249187445, "res": {"Yes": 0.9496194249187445, "No": 0.05037889205812851}, "ground_truth": 0}, {"key": "34378482", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7955823951598263, "res": {"Yes": 0.7955823951598263, "No": 0.2044113282952968}, "ground_truth": 0}, {"key": "34378482", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.6668811865788019, "res": {"Yes": 0.6668811865788019, "No": 0.3331168665399566}, "ground_truth": 0}, {"key": "34378482", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9953405819255999, "res": {"Yes": 0.9953405819255999, "No": 0.004659104577774777}, "ground_truth": 1}, {"key": "34378482", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9878135367810831, "res": {"Yes": 0.9878135367810831, "No": 0.0121857627062314}, "ground_truth": 0}, {"key": "34378482", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.5907841835060762, "res": {"Yes": 0.5907841835060762, "No": 0.40921176258404557}, "ground_truth": 0}, {"key": "38080102", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5670447807374981, "res": {"Yes": 0.5670447807374981, "No": 0.43295272806478596}, "ground_truth": 0}, {"key": "38080102", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9681952766251105, "res": {"Yes": 0.9681952766251105, "No": 0.031804413802719964}, "ground_truth": 0}, {"key": "38080102", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9779902728088523, "res": {"Yes": 0.9779902728088523, "No": 0.022008996002496933}, "ground_truth": 1}, {"key": "38080102", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9962733927005557, "res": {"Yes": 0.9962733927005557, "No": 0.0037262358886136094}, "ground_truth": 0}, {"key": "38080102", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9807819248073826, "res": {"Yes": 0.9807819248073826, "No": 0.01921499529165939}, "ground_truth": 0}, {"key": "40244537", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.023103471300391178, "res": {"No": 0.9768949517096174, "Yes": 0.023103471300391178}, "ground_truth": 0}, {"key": "40244537", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8890182099897599, "res": {"Yes": 0.8890182099897599, "No": 0.11097989724193227}, "ground_truth": 0}, {"key": "40244537", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9645820666584781, "res": {"Yes": 0.9645820666584781, "No": 0.03541681753652395}, "ground_truth": 1}, {"key": "40244537", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9467788329531105, "res": {"Yes": 0.9467788329531105, "No": 0.05321978783530556}, "ground_truth": 0}, {"key": "40244537", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.969854876047416, "res": {"Yes": 0.969854876047416, "No": 0.03014306797965906}, "ground_truth": 0}, {"key": "33497789", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6202640962161207, "res": {"Yes": 0.6202640962161207, "No": 0.37973303979674555}, "ground_truth": 0}, {"key": "33497789", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.7585172410477532, "res": {"Yes": 0.7585172410477532, "No": 0.24147952309048953}, "ground_truth": 0}, {"key": "33497789", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7909149255759891, "res": {"Yes": 0.7909149255759891, "No": 0.20908289057971222}, "ground_truth": 1}, {"key": "33497789", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8163901445166072, "res": {"Yes": 0.8163901445166072, "No": 0.18360886824026895}, "ground_truth": 0}, {"key": "33497789", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8779287930023324, "res": {"Yes": 0.8779287930023324, "No": 0.1220692710368099}, "ground_truth": 0}, {"key": "28816889", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9977547284016279, "res": {"Yes": 0.9977547284016279, "No": 0.002244983741292753}, "ground_truth": 0}, {"key": "28816889", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9968045887284737, "res": {"Yes": 0.9968045887284737, "No": 0.0031952376885735364}, "ground_truth": 0}, {"key": "28816889", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.997754256463753, "res": {"Yes": 0.997754256463753, "No": 0.0022454193103874013}, "ground_truth": 1}, {"key": "28816889", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992417605089564, "res": {"Yes": 0.9992417605089564, "No": 0.0007575792542356579}, "ground_truth": 0}, {"key": "28816889", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.999458907543352, "res": {"Yes": 0.999458907543352, "No": 0.0005410231439977619}, "ground_truth": 0}, {"key": "38157127", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 3.2575945599111248e-06, "res": {"No": 0.9999938462231346, "Yes": 3.2575945599111248e-06}, "ground_truth": 0}, {"key": "38157127", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.7523094026170863, "res": {"Yes": 0.7523094026170863, "No": 0.24768247398404206}, "ground_truth": 0}, {"key": "38157127", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9982336763889036, "res": {"Yes": 0.9982336763889036, "No": 0.00176438604318097}, "ground_truth": 1}, {"key": "38157127", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9956990337613384, "res": {"Yes": 0.9956990337613384, "No": 0.0043002548095647285}, "ground_truth": 0}, {"key": "38157127", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9410130558979198, "res": {"Yes": 0.9410130558979198, "No": 0.05898373801104158}, "ground_truth": 0}, {"key": "36183569", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9725118621309174, "res": {"Yes": 0.9725118621309174, "No": 0.027487192776712716}, "ground_truth": 0}, {"key": "36183569", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9651377420052029, "res": {"Yes": 0.9651377420052029, "No": 0.03485429175512156}, "ground_truth": 0}, {"key": "36183569", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9710073608786788, "res": {"Yes": 0.9710073608786788, "No": 0.028991656421368358}, "ground_truth": 1}, {"key": "36183569", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9935925587616485, "res": {"Yes": 0.9935925587616485, "No": 0.006405888714622501}, "ground_truth": 0}, {"key": "36183569", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.992836164085406, "res": {"Yes": 0.992836164085406, "No": 0.0071628881961113364}, "ground_truth": 0}, {"key": "36012016", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9156426229995128, "res": {"Yes": 0.9156426229995128, "No": 0.08435645049030131}, "ground_truth": 0}, {"key": "36012016", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989492829900884, "res": {"Yes": 0.9989492829900884, "No": 0.0010503001484447323}, "ground_truth": 0}, {"key": "36012016", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9974013294055385, "res": {"Yes": 0.9974013294055385, "No": 0.002597777518712932}, "ground_truth": 1}, {"key": "36012016", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9878593595503959, "res": {"Yes": 0.9878593595503959, "No": 0.012139465771033425}, "ground_truth": 0}, {"key": "36012016", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9957000984628844, "res": {"Yes": 0.9957000984628844, "No": 0.004299274463231873}, "ground_truth": 0}, {"key": "34571973", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5516027762445319, "res": {"Yes": 0.5516027762445319, "No": 0.4483900222174192}, "ground_truth": 0}, {"key": "34571973", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9294491408437563, "res": {"Yes": 0.9294491408437563, "No": 0.07054841974590617}, "ground_truth": 0}, {"key": "34571973", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8743012177217716, "res": {"Yes": 0.8743012177217716, "No": 0.12569440187227382}, "ground_truth": 1}, {"key": "34571973", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9836027625822814, "res": {"Yes": 0.9836027625822814, "No": 0.0163957438364725}, "ground_truth": 0}, {"key": "34571973", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9854804443157448, "res": {"Yes": 0.9854804443157448, "No": 0.014518068526856456}, "ground_truth": 0}, {"key": "38707498", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9814992592284868, "res": {"Yes": 0.9814992592284868, "No": 0.018500509624629628}, "ground_truth": 0}, {"key": "38707498", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.074668624138367, "res": {"No": 0.9253293692356175, "Yes": 0.074668624138367}, "ground_truth": 0}, {"key": "38707498", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8274850403675581, "res": {"Yes": 0.8274850403675581, "No": 0.17251460145104267}, "ground_truth": 1}, {"key": "38707498", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5698604002451273, "res": {"Yes": 0.5698604002451273, "No": 0.4301367919802269}, "ground_truth": 0}, {"key": "38707498", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9658467154683058, "res": {"Yes": 0.9658467154683058, "No": 0.03415192928926142}, "ground_truth": 0}, {"key": "35459082", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9979144889065413, "res": {"Yes": 0.9979144889065413, "No": 0.0020846270763345024}, "ground_truth": 0}, {"key": "35459082", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9903591444180102, "res": {"Yes": 0.9903591444180102, "No": 0.00963953893329376}, "ground_truth": 0}, {"key": "35459082", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9502256892221516, "res": {"Yes": 0.9502256892221516, "No": 0.04977378694984335}, "ground_truth": 1}, {"key": "35459082", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9960944087380684, "res": {"Yes": 0.9960944087380684, "No": 0.003904940627622715}, "ground_truth": 0}, {"key": "35459082", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9905464984731562, "res": {"Yes": 0.9905464984731562, "No": 0.009452641998404367}, "ground_truth": 0}, {"key": "39464041", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5868119665661855, "res": {"Yes": 0.5868119665661855, "No": 0.41317314359040475}, "ground_truth": 0}, {"key": "39464041", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9540512889279411, "res": {"Yes": 0.9540512889279411, "No": 0.04594590282946595}, "ground_truth": 0}, {"key": "39464041", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9813560738393319, "res": {"Yes": 0.9813560738393319, "No": 0.018642465670731453}, "ground_truth": 1}, {"key": "39464041", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9648298021763692, "res": {"Yes": 0.9648298021763692, "No": 0.03516967406340328}, "ground_truth": 0}, {"key": "39464041", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9834022460923918, "res": {"Yes": 0.9834022460923918, "No": 0.01659658379965307}, "ground_truth": 0}, {"key": "23782052", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.933402968652079, "res": {"Yes": 0.933402968652079, "No": 0.06659489207151176}, "ground_truth": 0}, {"key": "23782052", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9954622423745451, "res": {"Yes": 0.9954622423745451, "No": 0.004535855814448786}, "ground_truth": 0}, {"key": "23782052", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9559343536677782, "res": {"Yes": 0.9559343536677782, "No": 0.04406301987049199}, "ground_truth": 1}, {"key": "23782052", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9898664489537978, "res": {"Yes": 0.9898664489537978, "No": 0.010132698593492947}, "ground_truth": 0}, {"key": "23782052", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9417809581009358, "res": {"Yes": 0.9417809581009358, "No": 0.058213406414896085}, "ground_truth": 0}, {"key": "36568455", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.99225787434761, "res": {"Yes": 0.99225787434761, "No": 0.0077418360790275295}, "ground_truth": 0}, {"key": "36568455", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9979864318537643, "res": {"Yes": 0.9979864318537643, "No": 0.0020131572865136837}, "ground_truth": 0}, {"key": "36568455", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9988479432275276, "res": {"Yes": 0.9988479432275276, "No": 0.0011517370476630743}, "ground_truth": 1}, {"key": "36568455", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9970798039912877, "res": {"Yes": 0.9970798039912877, "No": 0.0029195627998230876}, "ground_truth": 0}, {"key": "36568455", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9952253281595348, "res": {"Yes": 0.9952253281595348, "No": 0.004774064215818575}, "ground_truth": 0}, {"key": "38469552", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0009646941981069051, "res": {"No": 0.9990349316183154, "Yes": 0.0009646941981069051}, "ground_truth": 0}, {"key": "38469552", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9956892192043337, "res": {"Yes": 0.9956892192043337, "No": 0.004310098591802164}, "ground_truth": 0}, {"key": "38469552", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9885185882959636, "res": {"Yes": 0.9885185882959636, "No": 0.011481096255620271}, "ground_truth": 1}, {"key": "38469552", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9909798221097049, "res": {"Yes": 0.9909798221097049, "No": 0.00901976228019917}, "ground_truth": 0}, {"key": "38469552", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9552592369827199, "res": {"Yes": 0.9552592369827199, "No": 0.04474031868058263}, "ground_truth": 0}, {"key": "35922277", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0925383431327066, "res": {"No": 0.9074610625904943, "Yes": 0.0925383431327066}, "ground_truth": 0}, {"key": "35922277", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.6175888834677696, "res": {"Yes": 0.6175888834677696, "No": 0.38241057649085086}, "ground_truth": 0}, {"key": "35922277", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9896157227945596, "res": {"Yes": 0.9896157227945596, "No": 0.010383741639122012}, "ground_truth": 1}, {"key": "35922277", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9976633567110109, "res": {"Yes": 0.9976633567110109, "No": 0.002336444382204264}, "ground_truth": 0}, {"key": "35922277", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9970954528827807, "res": {"Yes": 0.9970954528827807, "No": 0.002904268580204111}, "ground_truth": 0}, {"key": "32744293", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8494066008005072, "res": {"Yes": 0.8494066008005072, "No": 0.15059121479687237}, "ground_truth": 0}, {"key": "32744293", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8139634872043254, "res": {"Yes": 0.8139634872043254, "No": 0.18603249801872696}, "ground_truth": 0}, {"key": "32744293", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7792444987752261, "res": {"Yes": 0.7792444987752261, "No": 0.22075223830554605}, "ground_truth": 1}, {"key": "32744293", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9339422065717023, "res": {"Yes": 0.9339422065717023, "No": 0.06605607344552801}, "ground_truth": 0}, {"key": "32744293", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.32001836314713966, "res": {"No": 0.6799800292454135, "Yes": 0.32001836314713966}, "ground_truth": 0}, {"key": "30972362", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.12497973185334824, "res": {"No": 0.875019401261545, "Yes": 0.12497973185334824}, "ground_truth": 0}, {"key": "30972362", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.6060671139212975, "res": {"Yes": 0.6060671139212975, "No": 0.3939318319533155}, "ground_truth": 0}, {"key": "30972362", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7476999110059602, "res": {"Yes": 0.7476999110059602, "No": 0.25229783611673146}, "ground_truth": 1}, {"key": "30972362", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7573958445472383, "res": {"Yes": 0.7573958445472383, "No": 0.2426024217598402}, "ground_truth": 0}, {"key": "30972362", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9594991206516429, "res": {"Yes": 0.9594991206516429, "No": 0.04050006499727272}, "ground_truth": 0}, {"key": "36380943", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.14728362932691816, "res": {"No": 0.8527150700999137, "Yes": 0.14728362932691816}, "ground_truth": 0}, {"key": "36380943", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9952975957862014, "res": {"Yes": 0.9952975957862014, "No": 0.004701896222664991}, "ground_truth": 0}, {"key": "36380943", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9942060788770838, "res": {"Yes": 0.9942060788770838, "No": 0.005793397916689963}, "ground_truth": 1}, {"key": "36380943", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.979742261869431, "res": {"Yes": 0.979742261869431, "No": 0.020256632665940626}, "ground_truth": 0}, {"key": "36380943", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8793117498102048, "res": {"Yes": 0.8793117498102048, "No": 0.1206865367719728}, "ground_truth": 0}, {"key": "36929355", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9208792120726914, "res": {"Yes": 0.9208792120726914, "No": 0.07911897869933854}, "ground_truth": 0}, {"key": "36929355", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9908695510197731, "res": {"Yes": 0.9908695510197731, "No": 0.009129457551461244}, "ground_truth": 0}, {"key": "36929355", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9929060824670266, "res": {"Yes": 0.9929060824670266, "No": 0.0070927254832465715}, "ground_truth": 1}, {"key": "36929355", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9795634379980063, "res": {"Yes": 0.9795634379980063, "No": 0.020435471276707355}, "ground_truth": 0}, {"key": "36929355", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9642155247194166, "res": {"Yes": 0.9642155247194166, "No": 0.03578407918476891}, "ground_truth": 0}, {"key": "39127206", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.658628273672537, "res": {"Yes": 0.658628273672537, "No": 0.34134530138981534}, "ground_truth": 0}, {"key": "39127206", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.5067374599423125, "res": {"Yes": 0.5067374599423125, "No": 0.49319324699990086}, "ground_truth": 0}, {"key": "39127206", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.529203914369992, "res": {"Yes": 0.529203914369992, "No": 0.47076989970326666}, "ground_truth": 1}, {"key": "39127206", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5002643601416333, "res": {"Yes": 0.5002643601416333, "No": 0.4996521613278579}, "ground_truth": 0}, {"key": "39127206", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.013267387800705976, "res": {"No": 0.9866754829004334, "Yes": 0.013267387800705976}, "ground_truth": 0}, {"key": "36128318", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9271665814924109, "res": {"Yes": 0.9271665814924109, "No": 0.07282463413806974}, "ground_truth": 0}, {"key": "36128318", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9897566659846367, "res": {"Yes": 0.9897566659846367, "No": 0.010241542868177593}, "ground_truth": 0}, {"key": "36128318", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5918938308274122, "res": {"Yes": 0.5918938308274122, "No": 0.4081001210303888}, "ground_truth": 1}, {"key": "36128318", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9643102934423077, "res": {"Yes": 0.9643102934423077, "No": 0.03568671458265128}, "ground_truth": 0}, {"key": "36128318", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9040608851755667, "res": {"Yes": 0.9040608851755667, "No": 0.09593286068986118}, "ground_truth": 0}, {"key": "39863480", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8296748930869594, "res": {"Yes": 0.8296748930869594, "No": 0.17032389328665917}, "ground_truth": 0}, {"key": "39863480", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9827342777769493, "res": {"Yes": 0.9827342777769493, "No": 0.0172648851560222}, "ground_truth": 0}, {"key": "39863480", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9906852475992625, "res": {"Yes": 0.9906852475992625, "No": 0.009314608544685297}, "ground_truth": 1}, {"key": "39863480", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9675058544358714, "res": {"Yes": 0.9675058544358714, "No": 0.03249364978397866}, "ground_truth": 0}, {"key": "39863480", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9439429056204265, "res": {"Yes": 0.9439429056204265, "No": 0.05605595447772029}, "ground_truth": 0}, {"key": "38634057", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.00353840577258641, "res": {"No": 0.9964606155400975, "Yes": 0.00353840577258641}, "ground_truth": 0}, {"key": "38634057", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.231703903608783, "res": {"No": 0.7682932982317267, "Yes": 0.231703903608783}, "ground_truth": 0}, {"key": "38634057", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3268750798107739, "res": {"No": 0.6731212110381566, "Yes": 0.3268750798107739}, "ground_truth": 1}, {"key": "38634057", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8906489488052961, "res": {"Yes": 0.8906489488052961, "No": 0.10934955514972587}, "ground_truth": 0}, {"key": "38634057", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.5157073786498244, "res": {"Yes": 0.5157073786498244, "No": 0.4842904059052133}, "ground_truth": 0}, {"key": "33131935", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7362998340434959, "res": {"Yes": 0.7362998340434959, "No": 0.2636980712058117}, "ground_truth": 0}, {"key": "33131935", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.950455121352022, "res": {"Yes": 0.950455121352022, "No": 0.04954367377099497}, "ground_truth": 0}, {"key": "33131935", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9245708916721448, "res": {"Yes": 0.9245708916721448, "No": 0.07542871054396655}, "ground_truth": 1}, {"key": "33131935", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7253754689343178, "res": {"Yes": 0.7253754689343178, "No": 0.27462391476031506}, "ground_truth": 0}, {"key": "33131935", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9923134001711424, "res": {"Yes": 0.9923134001711424, "No": 0.007686036791736336}, "ground_truth": 0}, {"key": "39021319", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9882300205593442, "res": {"Yes": 0.9882300205593442, "No": 0.011769536950354906}, "ground_truth": 0}, {"key": "39021319", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.998947619740917, "res": {"Yes": 0.998947619740917, "No": 0.001052042990507673}, "ground_truth": 0}, {"key": "39021319", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997049803968442, "res": {"Yes": 0.9997049803968442, "No": 0.000294756490038118}, "ground_truth": 1}, {"key": "39021319", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984423096609294, "res": {"Yes": 0.9984423096609294, "No": 0.001556446874577226}, "ground_truth": 0}, {"key": "39021319", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9952474067816808, "res": {"Yes": 0.9952474067816808, "No": 0.004752219532843192}, "ground_truth": 0}, {"key": "40644571", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9447872444348265, "res": {"Yes": 0.9447872444348265, "No": 0.05521118055072583}, "ground_truth": 0}, {"key": "40644571", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9953743626660987, "res": {"Yes": 0.9953743626660987, "No": 0.004624909850460688}, "ground_truth": 0}, {"key": "40644571", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9939324434214336, "res": {"Yes": 0.9939324434214336, "No": 0.006067327808999015}, "ground_truth": 1}, {"key": "40644571", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9971981108390321, "res": {"Yes": 0.9971981108390321, "No": 0.002801715418303209}, "ground_truth": 0}, {"key": "40644571", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9942465036209673, "res": {"Yes": 0.9942465036209673, "No": 0.0057526710591982696}, "ground_truth": 0}, {"key": "14681877", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9787183855908317, "res": {"Yes": 0.9787183855908317, "No": 0.02128130694087715}, "ground_truth": 0}, {"key": "14681877", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9983903843230917, "res": {"Yes": 0.9983903843230917, "No": 0.0016093762458372612}, "ground_truth": 0}, {"key": "14681877", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9975137618225036, "res": {"Yes": 0.9975137618225036, "No": 0.002486179709226676}, "ground_truth": 1}, {"key": "14681877", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993735397082391, "res": {"Yes": 0.9993735397082391, "No": 0.0006261954594853281}, "ground_truth": 0}, {"key": "14681877", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9695820001898854, "res": {"Yes": 0.9695820001898854, "No": 0.030417548008059347}, "ground_truth": 0}, {"key": "36570890", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9803347960861167, "res": {"Yes": 0.9803347960861167, "No": 0.01966522168672888}, "ground_truth": 0}, {"key": "36570890", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.473510211779683, "res": {"No": 0.5264865993809092, "Yes": 0.473510211779683}, "ground_truth": 0}, {"key": "36570890", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9928802255435115, "res": {"Yes": 0.9928802255435115, "No": 0.007119728402515028}, "ground_truth": 1}, {"key": "36570890", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9221969283501458, "res": {"Yes": 0.9221969283501458, "No": 0.07780112404678813}, "ground_truth": 0}, {"key": "36570890", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8361135163564742, "res": {"Yes": 0.8361135163564742, "No": 0.16388608437155655}, "ground_truth": 0}, {"key": "30452755", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0993219783094467, "res": {"No": 0.9006776135530372, "Yes": 0.0993219783094467}, "ground_truth": 0}, {"key": "30452755", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9849499256557528, "res": {"Yes": 0.9849499256557528, "No": 0.015049763533055868}, "ground_truth": 0}, {"key": "30452755", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9880785785020803, "res": {"Yes": 0.9880785785020803, "No": 0.011921083784018245}, "ground_truth": 1}, {"key": "30452755", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8974731459629405, "res": {"Yes": 0.8974731459629405, "No": 0.1025254724059424}, "ground_truth": 0}, {"key": "30452755", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9878137610147814, "res": {"Yes": 0.9878137610147814, "No": 0.012184840278908855}, "ground_truth": 0}, {"key": "37347053", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.780472869531844, "res": {"Yes": 0.780472869531844, "No": 0.21952611030232028}, "ground_truth": 0}, {"key": "37347053", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9972107861053355, "res": {"Yes": 0.9972107861053355, "No": 0.002788869395127408}, "ground_truth": 0}, {"key": "37347053", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995870508569776, "res": {"Yes": 0.9995870508569776, "No": 0.00041257522713689283}, "ground_truth": 1}, {"key": "37347053", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.993773947391423, "res": {"Yes": 0.993773947391423, "No": 0.00622516083510796}, "ground_truth": 0}, {"key": "37347053", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9655328827030917, "res": {"Yes": 0.9655328827030917, "No": 0.03446653588576477}, "ground_truth": 0}, {"key": "38890979", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.003056456623974265, "res": {"No": 0.9969427144448555, "Yes": 0.003056456623974265}, "ground_truth": 0}, {"key": "38890979", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9973031458624657, "res": {"Yes": 0.9973031458624657, "No": 0.0026967956690624783}, "ground_truth": 0}, {"key": "38890979", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9931696486175244, "res": {"Yes": 0.9931696486175244, "No": 0.006829954276031214}, "ground_truth": 1}, {"key": "38890979", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9977382331579349, "res": {"Yes": 0.9977382331579349, "No": 0.002261655978877176}, "ground_truth": 0}, {"key": "38890979", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9945978026341381, "res": {"Yes": 0.9945978026341381, "No": 0.005402189200863702}, "ground_truth": 0}, {"key": "32974694", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8967355557782488, "res": {"Yes": 0.8967355557782488, "No": 0.10326398586445905}, "ground_truth": 0}, {"key": "32974694", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9057691742121461, "res": {"Yes": 0.9057691742121461, "No": 0.09422959262230259}, "ground_truth": 0}, {"key": "32974694", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9923551876659534, "res": {"Yes": 0.9923551876659534, "No": 0.007644572959177946}, "ground_truth": 1}, {"key": "32974694", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9939967429909458, "res": {"Yes": 0.9939967429909458, "No": 0.006002383131675594}, "ground_truth": 0}, {"key": "32974694", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9877480409881193, "res": {"Yes": 0.9877480409881193, "No": 0.012250671832816818}, "ground_truth": 0}, {"key": "38519940", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.07256295130561564, "res": {"No": 0.927429498303902, "Yes": 0.07256295130561564}, "ground_truth": 0}, {"key": "38519940", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9462616050992539, "res": {"Yes": 0.9462616050992539, "No": 0.053723755552880614}, "ground_truth": 0}, {"key": "38519940", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8919043438400449, "res": {"Yes": 0.8919043438400449, "No": 0.10808435412619045}, "ground_truth": 1}, {"key": "38519940", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9961596952038694, "res": {"Yes": 0.9961596952038694, "No": 0.00383971397820148}, "ground_truth": 0}, {"key": "38519940", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9935946691544846, "res": {"Yes": 0.9935946691544846, "No": 0.006404389271591062}, "ground_truth": 0}, {"key": "38870104", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 4.2569568815687985e-06, "res": {"No": 0.9999951574563252, "Yes": 4.2569568815687985e-06}, "ground_truth": 0}, {"key": "38870104", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.049708859609784835, "res": {"No": 0.9502902792075718, "Yes": 0.049708859609784835}, "ground_truth": 0}, {"key": "38870104", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4369934380076889, "res": {"No": 0.5630017457770041, "Yes": 0.4369934380076889}, "ground_truth": 1}, {"key": "38870104", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.205971040535417, "res": {"No": 0.7940197769878901, "Yes": 0.205971040535417}, "ground_truth": 0}, {"key": "38870104", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.2253954418667432, "res": {"No": 0.7746022323241072, "Yes": 0.2253954418667432}, "ground_truth": 0}, {"key": "34283161", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9720213254490246, "res": {"Yes": 0.9720213254490246, "No": 0.027977596893862738}, "ground_truth": 0}, {"key": "34283161", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9923456901776078, "res": {"Yes": 0.9923456901776078, "No": 0.007652640708918479}, "ground_truth": 0}, {"key": "34283161", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9900857464781149, "res": {"Yes": 0.9900857464781149, "No": 0.009913283039336376}, "ground_truth": 1}, {"key": "34283161", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9934051190122174, "res": {"Yes": 0.9934051190122174, "No": 0.006593888788726977}, "ground_truth": 0}, {"key": "34283161", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.85358629043588, "res": {"Yes": 0.85358629043588, "No": 0.14641292438985887}, "ground_truth": 0}, {"key": "31650463", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9893502019695783, "res": {"Yes": 0.9893502019695783, "No": 0.010649003066368172}, "ground_truth": 0}, {"key": "31650463", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9827494807938216, "res": {"Yes": 0.9827494807938216, "No": 0.017249812588475317}, "ground_truth": 0}, {"key": "31650463", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999124889792476, "res": {"Yes": 0.999124889792476, "No": 0.0008745847112383095}, "ground_truth": 1}, {"key": "31650463", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.993359232007407, "res": {"Yes": 0.993359232007407, "No": 0.006639417800404109}, "ground_truth": 0}, {"key": "31650463", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9955451782863566, "res": {"Yes": 0.9955451782863566, "No": 0.004454286001176557}, "ground_truth": 0}, {"key": "35589432", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.21967286863567026, "res": {"No": 0.7803245391505564, "Yes": 0.21967286863567026}, "ground_truth": 0}, {"key": "35589432", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9391737073779141, "res": {"Yes": 0.9391737073779141, "No": 0.06082453714252201}, "ground_truth": 0}, {"key": "35589432", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.32485826482695573, "res": {"No": 0.6751284701980574, "Yes": 0.32485826482695573}, "ground_truth": 1}, {"key": "35589432", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8257965857618536, "res": {"Yes": 0.8257965857618536, "No": 0.1741979469891004}, "ground_truth": 0}, {"key": "35589432", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9198895780510914, "res": {"Yes": 0.9198895780510914, "No": 0.08010480012893886}, "ground_truth": 0}, {"key": "14412752", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9901019889680145, "res": {"Yes": 0.9901019889680145, "No": 0.009895685746544501}, "ground_truth": 0}, {"key": "14412752", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9908330371587388, "res": {"Yes": 0.9908330371587388, "No": 0.009165027473874324}, "ground_truth": 0}, {"key": "14412752", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9905356228275809, "res": {"Yes": 0.9905356228275809, "No": 0.009461269578571822}, "ground_truth": 1}, {"key": "14412752", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9974165076752379, "res": {"Yes": 0.9974165076752379, "No": 0.0025813896526419486}, "ground_truth": 0}, {"key": "14412752", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.993982141286041, "res": {"Yes": 0.993982141286041, "No": 0.006013916973789731}, "ground_truth": 0}, {"key": "37271183", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9926727289067574, "res": {"Yes": 0.9926727289067574, "No": 0.007325172315905497}, "ground_truth": 0}, {"key": "37271183", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.99178415704104, "res": {"Yes": 0.99178415704104, "No": 0.008215367444571593}, "ground_truth": 0}, {"key": "37271183", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9935348929191552, "res": {"Yes": 0.9935348929191552, "No": 0.006463988738192061}, "ground_truth": 1}, {"key": "37271183", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9943628347813694, "res": {"Yes": 0.9943628347813694, "No": 0.005636601308428694}, "ground_truth": 0}, {"key": "37271183", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9905614711919137, "res": {"Yes": 0.9905614711919137, "No": 0.009437366596283882}, "ground_truth": 0}, {"key": "35588153", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9754742509627947, "res": {"Yes": 0.9754742509627947, "No": 0.024524611858912134}, "ground_truth": 0}, {"key": "35588153", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.979495035512373, "res": {"Yes": 0.979495035512373, "No": 0.020504709243993485}, "ground_truth": 0}, {"key": "35588153", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9883101218251106, "res": {"Yes": 0.9883101218251106, "No": 0.011689678570312612}, "ground_truth": 1}, {"key": "35588153", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9961012701588159, "res": {"Yes": 0.9961012701588159, "No": 0.0038986575618981216}, "ground_truth": 0}, {"key": "35588153", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9942502709275315, "res": {"Yes": 0.9942502709275315, "No": 0.005749467713632852}, "ground_truth": 0}, {"key": "39876692", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8656957587386755, "res": {"Yes": 0.8656957587386755, "No": 0.13430265586959095}, "ground_truth": 0}, {"key": "39876692", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988736326284726, "res": {"Yes": 0.9988736326284726, "No": 0.0011260526409024344}, "ground_truth": 0}, {"key": "39876692", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9952792860619273, "res": {"Yes": 0.9952792860619273, "No": 0.004720464845584135}, "ground_truth": 1}, {"key": "39876692", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9969126208872033, "res": {"Yes": 0.9969126208872033, "No": 0.0030863565308707707}, "ground_truth": 0}, {"key": "39876692", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9931467156019423, "res": {"Yes": 0.9931467156019423, "No": 0.006852315874341461}, "ground_truth": 0}, {"key": "38992323", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5900546613605315, "res": {"Yes": 0.5900546613605315, "No": 0.4099431161527798}, "ground_truth": 0}, {"key": "38992323", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9959041260666078, "res": {"Yes": 0.9959041260666078, "No": 0.004094715659939904}, "ground_truth": 0}, {"key": "38992323", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9653736533923133, "res": {"Yes": 0.9653736533923133, "No": 0.034623459722944484}, "ground_truth": 1}, {"key": "38992323", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984804603712155, "res": {"Yes": 0.9984804603712155, "No": 0.001518415381634823}, "ground_truth": 0}, {"key": "38992323", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9716198469341671, "res": {"Yes": 0.9716198469341671, "No": 0.028366809410104193}, "ground_truth": 0}, {"key": "37556002", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.3452576255148779, "res": {"No": 0.6547416556535502, "Yes": 0.3452576255148779}, "ground_truth": 0}, {"key": "37556002", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9894336284852129, "res": {"Yes": 0.9894336284852129, "No": 0.01056573564452305}, "ground_truth": 0}, {"key": "37556002", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997651403038574, "res": {"Yes": 0.9997651403038574, "No": 0.00023430543874028062}, "ground_truth": 1}, {"key": "37556002", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9846474880664605, "res": {"Yes": 0.9846474880664605, "No": 0.015352184536397574}, "ground_truth": 0}, {"key": "37556002", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998035119263106, "res": {"Yes": 0.9998035119263106, "No": 0.00019620549580240743}, "ground_truth": 0}, {"key": "39875801", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 3.857068958909621e-06, "res": {"No": 0.9999958726752174, "Yes": 3.857068958909621e-06}, "ground_truth": 0}, {"key": "39875801", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9933978202929397, "res": {"Yes": 0.9933978202929397, "No": 0.006602067641475108}, "ground_truth": 0}, {"key": "39875801", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981308203961188, "res": {"Yes": 0.9981308203961188, "No": 0.0018691163498221613}, "ground_truth": 1}, {"key": "39875801", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9958882838477278, "res": {"Yes": 0.9958882838477278, "No": 0.004111569705125364}, "ground_truth": 0}, {"key": "39875801", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9985710240598055, "res": {"Yes": 0.9985710240598055, "No": 0.00142892337912339}, "ground_truth": 0}, {"key": "39272285", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.45591453968648066, "res": {"No": 0.5440765092563533, "Yes": 0.45591453968648066}, "ground_truth": 0}, {"key": "39272285", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9670410872802537, "res": {"Yes": 0.9670410872802537, "No": 0.03295830504067386}, "ground_truth": 0}, {"key": "39272285", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9428482404636511, "res": {"Yes": 0.9428482404636511, "No": 0.05715105664133425}, "ground_truth": 1}, {"key": "39272285", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.99779638423399, "res": {"Yes": 0.99779638423399, "No": 0.0022033177200985747}, "ground_truth": 0}, {"key": "39272285", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.968034730241999, "res": {"Yes": 0.968034730241999, "No": 0.03196392998197324}, "ground_truth": 0}, {"key": "39629714", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9828323860885447, "res": {"Yes": 0.9828323860885447, "No": 0.017167435063529475}, "ground_truth": 0}, {"key": "39629714", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9846888766241629, "res": {"Yes": 0.9846888766241629, "No": 0.01531072064813513}, "ground_truth": 0}, {"key": "39629714", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9328140325759244, "res": {"Yes": 0.9328140325759244, "No": 0.06718521973929965}, "ground_truth": 1}, {"key": "39629714", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.984490783058606, "res": {"Yes": 0.984490783058606, "No": 0.015509012072285071}, "ground_truth": 0}, {"key": "39629714", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.3508598229845429, "res": {"No": 0.6491396034110242, "Yes": 0.3508598229845429}, "ground_truth": 0}, {"key": "34043257", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.3876556396961745, "res": {"No": 0.6123436404811861, "Yes": 0.3876556396961745}, "ground_truth": 0}, {"key": "34043257", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9251452655148136, "res": {"Yes": 0.9251452655148136, "No": 0.07485023713389564}, "ground_truth": 0}, {"key": "34043257", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9177485743730235, "res": {"Yes": 0.9177485743730235, "No": 0.08225102725435478}, "ground_truth": 1}, {"key": "34043257", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9613200163108235, "res": {"Yes": 0.9613200163108235, "No": 0.03867891343205944}, "ground_truth": 0}, {"key": "34043257", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.718849206471653, "res": {"Yes": 0.718849206471653, "No": 0.28115018112923823}, "ground_truth": 0}, {"key": "33995240", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.13871111748096573, "res": {"No": 0.8612871694705382, "Yes": 0.13871111748096573}, "ground_truth": 0}, {"key": "33995240", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996233789595161, "res": {"Yes": 0.9996233789595161, "No": 0.0003763232787753148}, "ground_truth": 0}, {"key": "33995240", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993992498726607, "res": {"Yes": 0.9993992498726607, "No": 0.000600309021199982}, "ground_truth": 1}, {"key": "33995240", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9980161181002872, "res": {"Yes": 0.9980161181002872, "No": 0.0019831765532475026}, "ground_truth": 0}, {"key": "33995240", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9130536717229991, "res": {"Yes": 0.9130536717229991, "No": 0.08694586459825582}, "ground_truth": 0}, {"key": "39399948", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8886652633845434, "res": {"Yes": 0.8886652633845434, "No": 0.11133266251567575}, "ground_truth": 0}, {"key": "39399948", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.979537590598587, "res": {"Yes": 0.979537590598587, "No": 0.02046059168021931}, "ground_truth": 0}, {"key": "39399948", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9898733404280047, "res": {"Yes": 0.9898733404280047, "No": 0.010126318016422626}, "ground_truth": 1}, {"key": "39399948", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9712757261311031, "res": {"Yes": 0.9712757261311031, "No": 0.028721026321854297}, "ground_truth": 0}, {"key": "39399948", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9207667188144348, "res": {"Yes": 0.9207667188144348, "No": 0.07922798473174539}, "ground_truth": 0}, {"key": "33185890", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8812628882482934, "res": {"Yes": 0.8812628882482934, "No": 0.11873438968412137}, "ground_truth": 0}, {"key": "33185890", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9756788283868383, "res": {"Yes": 0.9756788283868383, "No": 0.02432011726850576}, "ground_truth": 0}, {"key": "33185890", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9871035551045947, "res": {"Yes": 0.9871035551045947, "No": 0.01289533062751022}, "ground_truth": 1}, {"key": "33185890", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998542017091297, "res": {"Yes": 0.998542017091297, "No": 0.0014576807991334127}, "ground_truth": 0}, {"key": "33185890", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9871740614939475, "res": {"Yes": 0.9871740614939475, "No": 0.012825506442641136}, "ground_truth": 0}, {"key": "35280425", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9851575459234033, "res": {"Yes": 0.9851575459234033, "No": 0.014842018989374473}, "ground_truth": 0}, {"key": "35280425", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991216776910486, "res": {"Yes": 0.9991216776910486, "No": 0.0008775830177295681}, "ground_truth": 0}, {"key": "35280425", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9881664576722297, "res": {"Yes": 0.9881664576722297, "No": 0.01183216036697006}, "ground_truth": 1}, {"key": "35280425", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999006338731407, "res": {"Yes": 0.9999006338731407, "No": 9.875426156752712e-05}, "ground_truth": 0}, {"key": "35280425", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9924217597641942, "res": {"Yes": 0.9924217597641942, "No": 0.007576946476510332}, "ground_truth": 0}, {"key": "16365170", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9993332968953459, "res": {"Yes": 0.9993332968953459, "No": 0.0006664096409529359}, "ground_truth": 0}, {"key": "16365170", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9965969828327816, "res": {"Yes": 0.9965969828327816, "No": 0.0034029400514538856}, "ground_truth": 0}, {"key": "16365170", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9982551851623549, "res": {"Yes": 0.9982551851623549, "No": 0.0017447669242740041}, "ground_truth": 1}, {"key": "16365170", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.997853591336609, "res": {"Yes": 0.997853591336609, "No": 0.002146136564569293}, "ground_truth": 0}, {"key": "16365170", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986987056187739, "res": {"Yes": 0.9986987056187739, "No": 0.0013009975013691503}, "ground_truth": 0}, {"key": "24388238", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9460319487855079, "res": {"Yes": 0.9460319487855079, "No": 0.05396743337474689}, "ground_truth": 0}, {"key": "24388238", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9923889984778216, "res": {"Yes": 0.9923889984778216, "No": 0.007610414049359524}, "ground_truth": 0}, {"key": "24388238", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9955426934086926, "res": {"Yes": 0.9955426934086926, "No": 0.004455921915331644}, "ground_truth": 1}, {"key": "24388238", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9851997849660885, "res": {"Yes": 0.9851997849660885, "No": 0.014799181556169511}, "ground_truth": 0}, {"key": "24388238", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9952829493931373, "res": {"Yes": 0.9952829493931373, "No": 0.00471670130392323}, "ground_truth": 0}, {"key": "35024827", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.17153898850362526, "res": {"No": 0.8284588594641923, "Yes": 0.17153898850362526}, "ground_truth": 0}, {"key": "35024827", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.7548942241588136, "res": {"Yes": 0.7548942241588136, "No": 0.2451044580291642}, "ground_truth": 0}, {"key": "35024827", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9928173665984104, "res": {"Yes": 0.9928173665984104, "No": 0.007181442577619119}, "ground_truth": 1}, {"key": "35024827", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9980384403776247, "res": {"Yes": 0.9980384403776247, "No": 0.0019608263253880465}, "ground_truth": 0}, {"key": "35024827", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.03174062096374837, "res": {"No": 0.9682567434851123, "Yes": 0.03174062096374837}, "ground_truth": 0}, {"key": "38624944", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.31751275126222933, "res": {"No": 0.6824759163123987, "Yes": 0.31751275126222933}, "ground_truth": 0}, {"key": "38624944", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9072234568742961, "res": {"Yes": 0.9072234568742961, "No": 0.09276607288011746}, "ground_truth": 0}, {"key": "38624944", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9929290024791173, "res": {"Yes": 0.9929290024791173, "No": 0.007067983916958785}, "ground_truth": 1}, {"key": "38624944", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8242674141386759, "res": {"Yes": 0.8242674141386759, "No": 0.17573111637676397}, "ground_truth": 0}, {"key": "38624944", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.977637500052174, "res": {"Yes": 0.977637500052174, "No": 0.022354089483659505}, "ground_truth": 0}, {"key": "34719830", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6094497610226906, "res": {"Yes": 0.6094497610226906, "No": 0.3905473547848413}, "ground_truth": 0}, {"key": "34719830", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9780902793830766, "res": {"Yes": 0.9780902793830766, "No": 0.021908880983537955}, "ground_truth": 0}, {"key": "34719830", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9510966364348982, "res": {"Yes": 0.9510966364348982, "No": 0.04890223392533934}, "ground_truth": 1}, {"key": "34719830", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8394448487306537, "res": {"Yes": 0.8394448487306537, "No": 0.1605534612459133}, "ground_truth": 0}, {"key": "34719830", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.5322983385896561, "res": {"Yes": 0.5322983385896561, "No": 0.46770075498547303}, "ground_truth": 0}, {"key": "38995225", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7586070927363165, "res": {"Yes": 0.7586070927363165, "No": 0.24139021633320143}, "ground_truth": 0}, {"key": "38995225", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9453842560284808, "res": {"Yes": 0.9453842560284808, "No": 0.05461431232974966}, "ground_truth": 0}, {"key": "38995225", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9636171823374238, "res": {"Yes": 0.9636171823374238, "No": 0.036380647784123976}, "ground_truth": 1}, {"key": "38995225", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.984423311325989, "res": {"Yes": 0.984423311325989, "No": 0.015575112363262075}, "ground_truth": 0}, {"key": "38995225", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.17132829658345905, "res": {"No": 0.8286700771173051, "Yes": 0.17132829658345905}, "ground_truth": 0}, {"key": "34242311", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9322970200751501, "res": {"Yes": 0.9322970200751501, "No": 0.06770044675810065}, "ground_truth": 0}, {"key": "34242311", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9974148430882173, "res": {"Yes": 0.9974148430882173, "No": 0.002584933491446811}, "ground_truth": 0}, {"key": "34242311", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9834431663111909, "res": {"Yes": 0.9834431663111909, "No": 0.01655635935346349}, "ground_truth": 1}, {"key": "34242311", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987467438822084, "res": {"Yes": 0.9987467438822084, "No": 0.0012532325822277818}, "ground_truth": 0}, {"key": "34242311", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994660559388924, "res": {"Yes": 0.9994660559388924, "No": 0.0005335522787444684}, "ground_truth": 0}, {"key": "39253748", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.30539947660519706, "res": {"No": 0.6945952827597028, "Yes": 0.30539947660519706}, "ground_truth": 0}, {"key": "39253748", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.579164940874949, "res": {"Yes": 0.579164940874949, "No": 0.420833012027514}, "ground_truth": 0}, {"key": "39253748", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7150722660477816, "res": {"Yes": 0.7150722660477816, "No": 0.2849160794912991}, "ground_truth": 1}, {"key": "39253748", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9074642114858449, "res": {"Yes": 0.9074642114858449, "No": 0.09253405870660478}, "ground_truth": 0}, {"key": "39253748", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9216287258354103, "res": {"Yes": 0.9216287258354103, "No": 0.07836690271558976}, "ground_truth": 0}, {"key": "37131104", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.00011843455836733544, "res": {"No": 0.9998773990761076, "Yes": 0.00011843455836733544}, "ground_truth": 0}, {"key": "37131104", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9492073339466982, "res": {"Yes": 0.9492073339466982, "No": 0.050789938755879595}, "ground_truth": 0}, {"key": "37131104", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.948678318262645, "res": {"Yes": 0.948678318262645, "No": 0.05131945802438671}, "ground_truth": 1}, {"key": "37131104", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6462456657188604, "res": {"Yes": 0.6462456657188604, "No": 0.3537488786230113}, "ground_truth": 0}, {"key": "37131104", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9721483954097142, "res": {"Yes": 0.9721483954097142, "No": 0.02784920942237758}, "ground_truth": 0}, {"key": "38490554", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9109412621825819, "res": {"Yes": 0.9109412621825819, "No": 0.0890568810922428}, "ground_truth": 0}, {"key": "38490554", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9785676706502269, "res": {"Yes": 0.9785676706502269, "No": 0.02143199242892936}, "ground_truth": 0}, {"key": "38490554", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9960997302870526, "res": {"Yes": 0.9960997302870526, "No": 0.0039000808316391423}, "ground_truth": 1}, {"key": "38490554", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9919671233804926, "res": {"Yes": 0.9919671233804926, "No": 0.008032543913590786}, "ground_truth": 0}, {"key": "38490554", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9955204821034391, "res": {"Yes": 0.9955204821034391, "No": 0.004478710987324329}, "ground_truth": 0}, {"key": "29009500", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9131980953649429, "res": {"Yes": 0.9131980953649429, "No": 0.08679721873957404}, "ground_truth": 0}, {"key": "29009500", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9977419126237757, "res": {"Yes": 0.9977419126237757, "No": 0.00225757850067443}, "ground_truth": 0}, {"key": "29009500", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9982722877161663, "res": {"Yes": 0.9982722877161663, "No": 0.0017274782376491085}, "ground_truth": 1}, {"key": "29009500", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9898003429304744, "res": {"Yes": 0.9898003429304744, "No": 0.010198914356437189}, "ground_truth": 0}, {"key": "29009500", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9982981782436822, "res": {"Yes": 0.9982981782436822, "No": 0.001701168348762098}, "ground_truth": 0}, {"key": "36703057", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9124320701206114, "res": {"Yes": 0.9124320701206114, "No": 0.08756511420738328}, "ground_truth": 0}, {"key": "36703057", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.921039584258045, "res": {"Yes": 0.921039584258045, "No": 0.07895725886092284}, "ground_truth": 0}, {"key": "36703057", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8879638200561537, "res": {"Yes": 0.8879638200561537, "No": 0.11203135404886372}, "ground_truth": 1}, {"key": "36703057", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9060024814822752, "res": {"Yes": 0.9060024814822752, "No": 0.09398855118820613}, "ground_truth": 0}, {"key": "36703057", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.6940049055392181, "res": {"Yes": 0.6940049055392181, "No": 0.30599316924734704}, "ground_truth": 0}, {"key": "34876987", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6765284483508225, "res": {"Yes": 0.6765284483508225, "No": 0.3234671551083029}, "ground_truth": 0}, {"key": "34876987", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9149354743768277, "res": {"Yes": 0.9149354743768277, "No": 0.08505875721074374}, "ground_truth": 0}, {"key": "34876987", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9897114727475692, "res": {"Yes": 0.9897114727475692, "No": 0.010287201800342916}, "ground_truth": 1}, {"key": "34876987", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9898402930683238, "res": {"Yes": 0.9898402930683238, "No": 0.010157783703976976}, "ground_truth": 0}, {"key": "34876987", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8678080494185337, "res": {"Yes": 0.8678080494185337, "No": 0.13218814900805306}, "ground_truth": 0}, {"key": "36209258", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9821914208907501, "res": {"Yes": 0.9821914208907501, "No": 0.017807912464082004}, "ground_truth": 0}, {"key": "36209258", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9832964278703413, "res": {"Yes": 0.9832964278703413, "No": 0.016703526736981448}, "ground_truth": 0}, {"key": "36209258", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989824772277291, "res": {"Yes": 0.9989824772277291, "No": 0.0010172707226947185}, "ground_truth": 1}, {"key": "36209258", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9967708306187922, "res": {"Yes": 0.9967708306187922, "No": 0.003228947649312548}, "ground_truth": 0}, {"key": "36209258", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9950058792967964, "res": {"Yes": 0.9950058792967964, "No": 0.004993855611187391}, "ground_truth": 0}, {"key": "36854437", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6899743886350062, "res": {"Yes": 0.6899743886350062, "No": 0.3100209819759508}, "ground_truth": 0}, {"key": "36854437", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996238556100456, "res": {"Yes": 0.9996238556100456, "No": 0.0003760507313020858}, "ground_truth": 0}, {"key": "36854437", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9979899934740977, "res": {"Yes": 0.9979899934740977, "No": 0.002009733807019516}, "ground_truth": 1}, {"key": "36854437", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9932247958932761, "res": {"Yes": 0.9932247958932761, "No": 0.006774850356918569}, "ground_truth": 0}, {"key": "36854437", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.991199378187928, "res": {"Yes": 0.991199378187928, "No": 0.008799840347430444}, "ground_truth": 0}, {"key": "38047723", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.020993260947969675, "res": {"No": 0.9790045058105113, "Yes": 0.020993260947969675}, "ground_truth": 0}, {"key": "38047723", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9452272303465351, "res": {"Yes": 0.9452272303465351, "No": 0.05477151058764605}, "ground_truth": 0}, {"key": "38047723", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8913839402095455, "res": {"Yes": 0.8913839402095455, "No": 0.1086150607672033}, "ground_truth": 1}, {"key": "38047723", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.985005674414242, "res": {"Yes": 0.985005674414242, "No": 0.014992162716346189}, "ground_truth": 0}, {"key": "38047723", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9533883525849071, "res": {"Yes": 0.9533883525849071, "No": 0.04660891253601497}, "ground_truth": 0}, {"key": "34287816", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9217350602752942, "res": {"Yes": 0.9217350602752942, "No": 0.07826331533598384}, "ground_truth": 0}, {"key": "34287816", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9874957184135564, "res": {"Yes": 0.9874957184135564, "No": 0.01250335838449259}, "ground_truth": 0}, {"key": "34287816", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9775694705808653, "res": {"Yes": 0.9775694705808653, "No": 0.02242957900294492}, "ground_truth": 1}, {"key": "34287816", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9945972207945938, "res": {"Yes": 0.9945972207945938, "No": 0.005402495513675278}, "ground_truth": 0}, {"key": "34287816", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9026259650969065, "res": {"Yes": 0.9026259650969065, "No": 0.09737358561806762}, "ground_truth": 0}, {"key": "33235855", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7184509306138605, "res": {"Yes": 0.7184509306138605, "No": 0.28154699511214415}, "ground_truth": 0}, {"key": "33235855", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981543386315762, "res": {"Yes": 0.9981543386315762, "No": 0.0018453701468731636}, "ground_truth": 0}, {"key": "33235855", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9822650212157756, "res": {"Yes": 0.9822650212157756, "No": 0.01773413438919253}, "ground_truth": 1}, {"key": "33235855", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995342851788288, "res": {"Yes": 0.9995342851788288, "No": 0.00046564579938871705}, "ground_truth": 0}, {"key": "33235855", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9700807689426725, "res": {"Yes": 0.9700807689426725, "No": 0.029917969275123364}, "ground_truth": 0}, {"key": "34381016", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7405900467751272, "res": {"Yes": 0.7405900467751272, "No": 0.2594078877216072}, "ground_truth": 0}, {"key": "34381016", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.7963614605817557, "res": {"Yes": 0.7963614605817557, "No": 0.2036381570557505}, "ground_truth": 0}, {"key": "34381016", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9975730672747961, "res": {"Yes": 0.9975730672747961, "No": 0.0024265470481533713}, "ground_truth": 1}, {"key": "34381016", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9612524878742144, "res": {"Yes": 0.9612524878742144, "No": 0.03874693589448422}, "ground_truth": 0}, {"key": "34381016", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9689663666822464, "res": {"Yes": 0.9689663666822464, "No": 0.031031622927632566}, "ground_truth": 0}, {"key": "28064995", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8933360259738989, "res": {"Yes": 0.8933360259738989, "No": 0.10666243974844361}, "ground_truth": 0}, {"key": "28064995", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987726686879029, "res": {"Yes": 0.9987726686879029, "No": 0.001226126825799322}, "ground_truth": 0}, {"key": "28064995", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992654497142464, "res": {"Yes": 0.9992654497142464, "No": 0.0007343333332085266}, "ground_truth": 1}, {"key": "28064995", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9910237284433514, "res": {"Yes": 0.9910237284433514, "No": 0.008975882686257963}, "ground_truth": 0}, {"key": "28064995", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9984627517783522, "res": {"Yes": 0.9984627517783522, "No": 0.0015365740248120831}, "ground_truth": 0}, {"key": "37576197", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.005304945925965981, "res": {"No": 0.9946935631418299, "Yes": 0.005304945925965981}, "ground_truth": 0}, {"key": "37576197", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8632307434484461, "res": {"Yes": 0.8632307434484461, "No": 0.1367534102702683}, "ground_truth": 0}, {"key": "37576197", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7945885384557251, "res": {"Yes": 0.7945885384557251, "No": 0.2054105540287078}, "ground_truth": 1}, {"key": "37576197", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9758084403165732, "res": {"Yes": 0.9758084403165732, "No": 0.02419120842615609}, "ground_truth": 0}, {"key": "37576197", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8652375201278, "res": {"Yes": 0.8652375201278, "No": 0.13475589802783586}, "ground_truth": 0}, {"key": "34454741", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9815695429807824, "res": {"Yes": 0.9815695429807824, "No": 0.018429143159465033}, "ground_truth": 0}, {"key": "34454741", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9935759576577875, "res": {"Yes": 0.9935759576577875, "No": 0.006423400041435034}, "ground_truth": 0}, {"key": "34454741", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9749951132969288, "res": {"Yes": 0.9749951132969288, "No": 0.02500136889032425}, "ground_truth": 1}, {"key": "34454741", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9737247167119973, "res": {"Yes": 0.9737247167119973, "No": 0.026274000576315484}, "ground_truth": 0}, {"key": "34454741", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9936268035193057, "res": {"Yes": 0.9936268035193057, "No": 0.0063728914218932654}, "ground_truth": 0}, {"key": "34766970", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.996590478165162, "res": {"Yes": 0.996590478165162, "No": 0.0034089464308029965}, "ground_truth": 0}, {"key": "34766970", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9960398840224892, "res": {"Yes": 0.9960398840224892, "No": 0.003959430805625131}, "ground_truth": 0}, {"key": "34766970", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9936776521815764, "res": {"Yes": 0.9936776521815764, "No": 0.00632195362949309}, "ground_truth": 1}, {"key": "34766970", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9971823413725158, "res": {"Yes": 0.9971823413725158, "No": 0.0028171684337129003}, "ground_truth": 0}, {"key": "34766970", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9978556094975383, "res": {"Yes": 0.9978556094975383, "No": 0.002144086910460039}, "ground_truth": 0}, {"key": "35574186", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8332833238289259, "res": {"Yes": 0.8332833238289259, "No": 0.1667148351639474}, "ground_truth": 0}, {"key": "35574186", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9732920107481021, "res": {"Yes": 0.9732920107481021, "No": 0.026707047422391734}, "ground_truth": 0}, {"key": "35574186", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9660662715826, "res": {"Yes": 0.9660662715826, "No": 0.03393202568777729}, "ground_truth": 1}, {"key": "35574186", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9769544581472255, "res": {"Yes": 0.9769544581472255, "No": 0.023044411272518942}, "ground_truth": 0}, {"key": "35574186", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.7708150634614243, "res": {"Yes": 0.7708150634614243, "No": 0.22918361144318974}, "ground_truth": 0}, {"key": "35486470", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 2.3223914796919244e-05, "res": {"No": 0.9999756083404814, "Yes": 2.3223914796919244e-05}, "ground_truth": 0}, {"key": "35486470", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9827144906214742, "res": {"Yes": 0.9827144906214742, "No": 0.01728239371545525}, "ground_truth": 0}, {"key": "35486470", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9015681301362071, "res": {"Yes": 0.9015681301362071, "No": 0.09842358683898864}, "ground_truth": 1}, {"key": "35486470", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9609490039774973, "res": {"Yes": 0.9609490039774973, "No": 0.0390468371833876}, "ground_truth": 0}, {"key": "35486470", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9716417970460658, "res": {"Yes": 0.9716417970460658, "No": 0.028354792938826347}, "ground_truth": 0}, {"key": "40977702", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.00426018113351641, "res": {"No": 0.9957393367897919, "Yes": 0.00426018113351641}, "ground_truth": 0}, {"key": "40977702", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9232116575307694, "res": {"Yes": 0.9232116575307694, "No": 0.07678518201172377}, "ground_truth": 0}, {"key": "40977702", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6828219100801564, "res": {"Yes": 0.6828219100801564, "No": 0.3171675803978867}, "ground_truth": 1}, {"key": "40977702", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5635434168130631, "res": {"Yes": 0.5635434168130631, "No": 0.43643383323157786}, "ground_truth": 0}, {"key": "40977702", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.5694660850609392, "res": {"Yes": 0.5694660850609392, "No": 0.4305218075790564}, "ground_truth": 0}, {"key": "35336618", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 2.991118701154683e-05, "res": {"No": 0.9999697675220106, "Yes": 2.991118701154683e-05}, "ground_truth": 0}, {"key": "35336618", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9969280222095587, "res": {"Yes": 0.9969280222095587, "No": 0.0030715468019087036}, "ground_truth": 0}, {"key": "35336618", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9976706002721082, "res": {"Yes": 0.9976706002721082, "No": 0.0023293328101148974}, "ground_truth": 1}, {"key": "35336618", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989083612715383, "res": {"Yes": 0.9989083612715383, "No": 0.0010912325955857104}, "ground_truth": 0}, {"key": "35336618", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9887452054642699, "res": {"Yes": 0.9887452054642699, "No": 0.011253152132746616}, "ground_truth": 0}, {"key": "33024679", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9911876662452697, "res": {"Yes": 0.9911876662452697, "No": 0.008811390671638221}, "ground_truth": 0}, {"key": "33024679", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985914725073165, "res": {"Yes": 0.9985914725073165, "No": 0.0014082429308695371}, "ground_truth": 0}, {"key": "33024679", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.993353581398823, "res": {"Yes": 0.993353581398823, "No": 0.006645943569384108}, "ground_truth": 1}, {"key": "33024679", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9928273461508897, "res": {"Yes": 0.9928273461508897, "No": 0.00717239750648735}, "ground_truth": 0}, {"key": "33024679", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994203291740881, "res": {"Yes": 0.9994203291740881, "No": 0.0005794085593244784}, "ground_truth": 0}, {"key": "37451334", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9968794498694564, "res": {"Yes": 0.9968794498694564, "No": 0.0031204554819857016}, "ground_truth": 0}, {"key": "37451334", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992259295871956, "res": {"Yes": 0.9992259295871956, "No": 0.0007734465436774766}, "ground_truth": 0}, {"key": "37451334", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8914961056502826, "res": {"Yes": 0.8914961056502826, "No": 0.10850283253814906}, "ground_truth": 1}, {"key": "37451334", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9981937688024932, "res": {"Yes": 0.9981937688024932, "No": 0.0018058181882733356}, "ground_truth": 0}, {"key": "37451334", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9419145149352934, "res": {"Yes": 0.9419145149352934, "No": 0.05808479228938135}, "ground_truth": 0}, {"key": "33354824", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5023403002545765, "res": {"Yes": 0.5023403002545765, "No": 0.49765779340437394}, "ground_truth": 0}, {"key": "33354824", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995672787708485, "res": {"Yes": 0.9995672787708485, "No": 0.00043244085170858823}, "ground_truth": 0}, {"key": "33354824", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996039599145867, "res": {"Yes": 0.9996039599145867, "No": 0.000395774204494804}, "ground_truth": 1}, {"key": "33354824", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998833512079994, "res": {"Yes": 0.9998833512079994, "No": 0.00011627761422324946}, "ground_truth": 0}, {"key": "33354824", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994188996042717, "res": {"Yes": 0.9994188996042717, "No": 0.0005807509212524442}, "ground_truth": 0}, {"key": "34688538", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.04055843926902882, "res": {"No": 0.9594388736743928, "Yes": 0.04055843926902882}, "ground_truth": 0}, {"key": "34688538", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.23483824992862087, "res": {"No": 0.76515898746267, "Yes": 0.23483824992862087}, "ground_truth": 0}, {"key": "34688538", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3526902097976243, "res": {"No": 0.6473049281924346, "Yes": 0.3526902097976243}, "ground_truth": 1}, {"key": "34688538", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.34556016914870813, "res": {"No": 0.6544335412766416, "Yes": 0.34556016914870813}, "ground_truth": 0}, {"key": "34688538", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.4024221970753068, "res": {"No": 0.5975727770497985, "Yes": 0.4024221970753068}, "ground_truth": 0}, {"key": "33646276", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.03482029056165003, "res": {"No": 0.9651750549515634, "Yes": 0.03482029056165003}, "ground_truth": 0}, {"key": "33646276", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9297191385552338, "res": {"Yes": 0.9297191385552338, "No": 0.07027583362031428}, "ground_truth": 0}, {"key": "33646276", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9959372354496365, "res": {"Yes": 0.9959372354496365, "No": 0.004061528087291786}, "ground_truth": 1}, {"key": "33646276", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7909410341086013, "res": {"Yes": 0.7909410341086013, "No": 0.2090427505182901}, "ground_truth": 0}, {"key": "33646276", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.976420775536222, "res": {"Yes": 0.976420775536222, "No": 0.02357683539609846}, "ground_truth": 0}, {"key": "40322608", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 1.570287093853844e-05, "res": {"No": 0.9999838332276837, "Yes": 1.570287093853844e-05}, "ground_truth": 0}, {"key": "40322608", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9955299353121634, "res": {"Yes": 0.9955299353121634, "No": 0.004469741508968562}, "ground_truth": 0}, {"key": "40322608", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9866616636203965, "res": {"Yes": 0.9866616636203965, "No": 0.0133377355737304}, "ground_truth": 1}, {"key": "40322608", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8463993041563117, "res": {"Yes": 0.8463993041563117, "No": 0.15359899519333822}, "ground_truth": 0}, {"key": "40322608", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.48756586011368797, "res": {"No": 0.5124325174026267, "Yes": 0.48756586011368797}, "ground_truth": 0}, {"key": "39565762", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9952544926698675, "res": {"Yes": 0.9952544926698675, "No": 0.004744102553516626}, "ground_truth": 0}, {"key": "39565762", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989183525028251, "res": {"Yes": 0.9989183525028251, "No": 0.0010815418684561224}, "ground_truth": 0}, {"key": "39565762", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986481771368142, "res": {"Yes": 0.9986481771368142, "No": 0.0013513414678715694}, "ground_truth": 1}, {"key": "39565762", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9950775234933159, "res": {"Yes": 0.9950775234933159, "No": 0.004922040985729973}, "ground_truth": 0}, {"key": "39565762", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9930367915668696, "res": {"Yes": 0.9930367915668696, "No": 0.006962534629672434}, "ground_truth": 0}, {"key": "30534259", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 9.318565037949785e-07, "res": {"No": 0.9999980183344636, "Yes": 9.318565037949785e-07}, "ground_truth": 0}, {"key": "30534259", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9984109444878131, "res": {"Yes": 0.9984109444878131, "No": 0.0015886579115070036}, "ground_truth": 0}, {"key": "30534259", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.993476879129969, "res": {"Yes": 0.993476879129969, "No": 0.006521988270943642}, "ground_truth": 1}, {"key": "30534259", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985697148341941, "res": {"Yes": 0.9985697148341941, "No": 0.0014293192457243995}, "ground_truth": 0}, {"key": "30534259", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9825256045181716, "res": {"Yes": 0.9825256045181716, "No": 0.01747242246792749}, "ground_truth": 0}, {"key": "39644242", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8523776066871935, "res": {"Yes": 0.8523776066871935, "No": 0.14762179723133118}, "ground_truth": 0}, {"key": "39644242", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996763876737683, "res": {"Yes": 0.9996763876737683, "No": 0.00032344094853180706}, "ground_truth": 0}, {"key": "39644242", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9915328132660555, "res": {"Yes": 0.9915328132660555, "No": 0.008466924003275407}, "ground_truth": 1}, {"key": "39644242", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999349252328169, "res": {"Yes": 0.999349252328169, "No": 0.0006504244553245836}, "ground_truth": 0}, {"key": "39644242", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992611615756228, "res": {"Yes": 0.9992611615756228, "No": 0.0007384397822059457}, "ground_truth": 0}, {"key": "19853740", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9941200860570791, "res": {"Yes": 0.9941200860570791, "No": 0.00587867137051046}, "ground_truth": 0}, {"key": "19853740", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9930349080757733, "res": {"Yes": 0.9930349080757733, "No": 0.0069647539430877205}, "ground_truth": 0}, {"key": "19853740", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9940119406215286, "res": {"Yes": 0.9940119406215286, "No": 0.005987289939702431}, "ground_truth": 1}, {"key": "19853740", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9954035352267957, "res": {"Yes": 0.9954035352267957, "No": 0.004595813953069615}, "ground_truth": 0}, {"key": "19853740", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998257952254141, "res": {"Yes": 0.9998257952254141, "No": 0.00017391376239998972}, "ground_truth": 0}, {"key": "33023078", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7331080940204102, "res": {"Yes": 0.7331080940204102, "No": 0.26689013090684366}, "ground_truth": 0}, {"key": "33023078", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.905217710612893, "res": {"Yes": 0.905217710612893, "No": 0.09478167406110256}, "ground_truth": 0}, {"key": "33023078", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9954070761820897, "res": {"Yes": 0.9954070761820897, "No": 0.004592870817438266}, "ground_truth": 1}, {"key": "33023078", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989282284578287, "res": {"Yes": 0.9989282284578287, "No": 0.0010716837169768133}, "ground_truth": 0}, {"key": "33023078", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9822117770188876, "res": {"Yes": 0.9822117770188876, "No": 0.017787781321607812}, "ground_truth": 0}, {"key": "38329806", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9544315226331472, "res": {"Yes": 0.9544315226331472, "No": 0.04556825350426674}, "ground_truth": 0}, {"key": "38329806", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.6591098014985328, "res": {"Yes": 0.6591098014985328, "No": 0.3408880345648651}, "ground_truth": 0}, {"key": "38329806", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7002246267902578, "res": {"Yes": 0.7002246267902578, "No": 0.2997736767111265}, "ground_truth": 1}, {"key": "38329806", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9867386173510142, "res": {"Yes": 0.9867386173510142, "No": 0.013260799525518955}, "ground_truth": 0}, {"key": "38329806", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9976202903092519, "res": {"Yes": 0.9976202903092519, "No": 0.0023793988672199157}, "ground_truth": 0}, {"key": "38761942", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9786327426709448, "res": {"Yes": 0.9786327426709448, "No": 0.021366316335766803}, "ground_truth": 0}, {"key": "38761942", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9936501103029499, "res": {"Yes": 0.9936501103029499, "No": 0.006349768013830065}, "ground_truth": 0}, {"key": "38761942", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991609482009844, "res": {"Yes": 0.9991609482009844, "No": 0.0008385955749103502}, "ground_truth": 1}, {"key": "38761942", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9980036488650021, "res": {"Yes": 0.9980036488650021, "No": 0.0019959936149915713}, "ground_truth": 0}, {"key": "38761942", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.975161954109031, "res": {"Yes": 0.975161954109031, "No": 0.02483728568072528}, "ground_truth": 0}, {"key": "33773576", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.40228376351027284, "res": {"No": 0.5977136833935351, "Yes": 0.40228376351027284}, "ground_truth": 0}, {"key": "33773576", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991129946220507, "res": {"Yes": 0.9991129946220507, "No": 0.0008869787793595599}, "ground_truth": 0}, {"key": "33773576", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9734520554975438, "res": {"Yes": 0.9734520554975438, "No": 0.026547587545470697}, "ground_truth": 1}, {"key": "33773576", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9958551835609818, "res": {"Yes": 0.9958551835609818, "No": 0.004144692827069145}, "ground_truth": 0}, {"key": "33773576", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9911441025223262, "res": {"Yes": 0.9911441025223262, "No": 0.008855720009373472}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9514583435243877, "res": {"Yes": 0.9514583435243877, "No": 0.04854049892987008}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.998999011523539, "res": {"Yes": 0.998999011523539, "No": 0.0010007559645794346}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9878039925727988, "res": {"Yes": 0.9878039925727988, "No": 0.01219583230439827}, "ground_truth": 1}, {"key": "37642631", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9971952616486601, "res": {"Yes": 0.9971952616486601, "No": 0.00280457294323363}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987712400444995, "res": {"Yes": 0.9987712400444995, "No": 0.0012286954281273087}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9554764016407926, "res": {"Yes": 0.9554764016407926, "No": 0.04452317672996236}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994651027685586, "res": {"Yes": 0.9994651027685586, "No": 0.0005346397512649246}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995721639879392, "res": {"Yes": 0.9995721639879392, "No": 0.00042777038071638355}, "ground_truth": 1}, {"key": "36609836", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995700192682222, "res": {"Yes": 0.9995700192682222, "No": 0.00042979594861936493}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998100668695813, "res": {"Yes": 0.9998100668695813, "No": 0.00018979101731801592}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.979983997041831, "res": {"Yes": 0.979983997041831, "No": 0.020015115817885155}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9870730066964798, "res": {"Yes": 0.9870730066964798, "No": 0.012925610831678706}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9150520500719399, "res": {"Yes": 0.9150520500719399, "No": 0.08494668963641487}, "ground_truth": 1}, {"key": "41035610", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994098490476159, "res": {"Yes": 0.9994098490476159, "No": 0.0005900283331083478}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9484545675654286, "res": {"Yes": 0.9484545675654286, "No": 0.051542168527101806}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.994029256162146, "res": {"Yes": 0.994029256162146, "No": 0.0059705179293837}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985781587754037, "res": {"Yes": 0.9985781587754037, "No": 0.0014215065771154974}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999154754821126, "res": {"Yes": 0.999154754821126, "No": 0.0008441137479472156}, "ground_truth": 1}, {"key": "37592684", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995111890070393, "res": {"Yes": 0.9995111890070393, "No": 0.0004882570250928442}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987866918542476, "res": {"Yes": 0.9987866918542476, "No": 0.001212880706127963}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.997382703309112, "res": {"Yes": 0.997382703309112, "No": 0.0026171056123021137}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9937048400709144, "res": {"Yes": 0.9937048400709144, "No": 0.006294789822812051}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9948427935882579, "res": {"Yes": 0.9948427935882579, "No": 0.0051566455401831955}, "ground_truth": 1}, {"key": "38951040", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9954005794799266, "res": {"Yes": 0.9954005794799266, "No": 0.004599204156092839}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9949041441519813, "res": {"Yes": 0.9949041441519813, "No": 0.005095308829065777}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8412565601125945, "res": {"Yes": 0.8412565601125945, "No": 0.1587327037342893}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.7960811588347808, "res": {"Yes": 0.7960811588347808, "No": 0.2039154361000721}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8193640896554703, "res": {"Yes": 0.8193640896554703, "No": 0.18062870648388005}, "ground_truth": 1}, {"key": "40774469", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9614419886412061, "res": {"Yes": 0.9614419886412061, "No": 0.03855624479515049}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9459808436279844, "res": {"Yes": 0.9459808436279844, "No": 0.05401540474742681}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.028130581072794286, "res": {"No": 0.9718674967793431, "Yes": 0.028130581072794286}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.6680891074202726, "res": {"Yes": 0.6680891074202726, "No": 0.3319098443850283}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9223444547696532, "res": {"Yes": 0.9223444547696532, "No": 0.07765463812544962}, "ground_truth": 1}, {"key": "40876288", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.983986957097448, "res": {"Yes": 0.983986957097448, "No": 0.016012599466104466}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9863327841924394, "res": {"Yes": 0.9863327841924394, "No": 0.013665525560569134}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9428363719647006, "res": {"Yes": 0.9428363719647006, "No": 0.05716146479638239}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.754262197032351, "res": {"Yes": 0.754262197032351, "No": 0.24573398252592263}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9597520491655931, "res": {"Yes": 0.9597520491655931, "No": 0.040246734855925975}, "ground_truth": 1}, {"key": "40340131", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9734598509328171, "res": {"Yes": 0.9734598509328171, "No": 0.02653828690036294}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.2840260555883067, "res": {"No": 0.7159725505335698, "Yes": 0.2840260555883067}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9382125842433512, "res": {"Yes": 0.9382125842433512, "No": 0.06178653614408543}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9231215435462959, "res": {"Yes": 0.9231215435462959, "No": 0.07687790999333854}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9979685050847009, "res": {"Yes": 0.9979685050847009, "No": 0.0020311165754011502}, "ground_truth": 1}, {"key": "30121591", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9952419789161749, "res": {"Yes": 0.9952419789161749, "No": 0.004757871854993989}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9634378862437778, "res": {"Yes": 0.9634378862437778, "No": 0.036561127249782444}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.16185649009013062, "res": {"No": 0.8381423032241404, "Yes": 0.16185649009013062}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9886143551785551, "res": {"Yes": 0.9886143551785551, "No": 0.011384386131275568}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9437671560387737, "res": {"Yes": 0.9437671560387737, "No": 0.056231189652589955}, "ground_truth": 1}, {"key": "35623366", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9768332979072646, "res": {"Yes": 0.9768332979072646, "No": 0.02316581913151202}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8484891005002219, "res": {"Yes": 0.8484891005002219, "No": 0.15150893179923625}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9978595275852812, "res": {"Yes": 0.9978595275852812, "No": 0.0021403442465748068}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999229229965849, "res": {"Yes": 0.9999229229965849, "No": 7.675262270720289e-05}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99997334352929, "res": {"Yes": 0.99997334352929, "No": 2.6572731488369746e-05}, "ground_truth": 1}, {"key": "41014093", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999223270267016, "res": {"Yes": 0.9999223270267016, "No": 7.742928420675624e-05}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998187634553272, "res": {"Yes": 0.9998187634553272, "No": 0.00018112702088147952}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.022308877177951745, "res": {"No": 0.9776905901428019, "Yes": 0.022308877177951745}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9907493653372387, "res": {"Yes": 0.9907493653372387, "No": 0.00924995122911325}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9932872473622433, "res": {"Yes": 0.9932872473622433, "No": 0.006712431661373144}, "ground_truth": 1}, {"key": "11387984", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9891315876466427, "res": {"Yes": 0.9891315876466427, "No": 0.010868119411915228}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9713158659011636, "res": {"Yes": 0.9713158659011636, "No": 0.028683675957586877}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.003235706442703139, "res": {"No": 0.9967636046545246, "Yes": 0.003235706442703139}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9931764749939301, "res": {"Yes": 0.9931764749939301, "No": 0.006823384972358798}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9669003108512789, "res": {"Yes": 0.9669003108512789, "No": 0.03309888774611873}, "ground_truth": 1}, {"key": "39508312", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9045455826543088, "res": {"Yes": 0.9045455826543088, "No": 0.0954526753874446}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9765051165466709, "res": {"Yes": 0.9765051165466709, "No": 0.02348676518726662}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.017659948500094752, "res": {"No": 0.9823363421390381, "Yes": 0.017659948500094752}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9666437102257482, "res": {"Yes": 0.9666437102257482, "No": 0.03335294182053593}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8228629640551159, "res": {"Yes": 0.8228629640551159, "No": 0.17713505761507806}, "ground_truth": 1}, {"key": "35815369", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9594543457104204, "res": {"Yes": 0.9594543457104204, "No": 0.04054423819035083}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.866797641717726, "res": {"Yes": 0.866797641717726, "No": 0.13320080198307732}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.2287440010702252, "res": {"No": 0.7712552628448148, "Yes": 0.2287440010702252}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9563509717913325, "res": {"Yes": 0.9563509717913325, "No": 0.043648336822489836}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9103156185797421, "res": {"Yes": 0.9103156185797421, "No": 0.08968417023652879}, "ground_truth": 1}, {"key": "35802823", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7808099270631506, "res": {"Yes": 0.7808099270631506, "No": 0.21918987843438323}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8990009120454172, "res": {"Yes": 0.8990009120454172, "No": 0.10099755200589583}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9889095550018643, "res": {"Yes": 0.9889095550018643, "No": 0.011089162488430093}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9629146792680875, "res": {"Yes": 0.9629146792680875, "No": 0.03708217807955633}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985171877540574, "res": {"Yes": 0.9985171877540574, "No": 0.001482407683288918}, "ground_truth": 1}, {"key": "38499968", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9885739201954381, "res": {"Yes": 0.9885739201954381, "No": 0.011424684014026106}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9922024955004283, "res": {"Yes": 0.9922024955004283, "No": 0.007796318842157721}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9919593840831871, "res": {"Yes": 0.9919593840831871, "No": 0.008040081974579642}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.997840889339948, "res": {"Yes": 0.997840889339948, "No": 0.002159067205209356}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9943278214959406, "res": {"Yes": 0.9943278214959406, "No": 0.0056705634062347516}, "ground_truth": 1}, {"key": "36926726", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.988602237312364, "res": {"Yes": 0.988602237312364, "No": 0.011397106176183591}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9811739069007178, "res": {"Yes": 0.9811739069007178, "No": 0.018825291889463493}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.1801768698967769, "res": {"No": 0.8198218896998116, "Yes": 0.1801768698967769}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.995795006846322, "res": {"Yes": 0.995795006846322, "No": 0.004204603426469736}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997465600513311, "res": {"Yes": 0.9997465600513311, "No": 0.00025337270777541946}, "ground_truth": 1}, {"key": "40903712", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986661272001982, "res": {"Yes": 0.9986661272001982, "No": 0.0013337176640877974}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9967991375181249, "res": {"Yes": 0.9967991375181249, "No": 0.003200576593305269}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7954174721141755, "res": {"Yes": 0.7954174721141755, "No": 0.2045803440942905}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9936335149942163, "res": {"Yes": 0.9936335149942163, "No": 0.006365837988139756}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.961767828461351, "res": {"Yes": 0.961767828461351, "No": 0.038231518956772806}, "ground_truth": 1}, {"key": "19614862", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9968798064532994, "res": {"Yes": 0.9968798064532994, "No": 0.00311989603447375}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9954761828258512, "res": {"Yes": 0.9954761828258512, "No": 0.004523374467824726}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0002953963630182999, "res": {"No": 0.999703669534548, "Yes": 0.0002953963630182999}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9664630807277272, "res": {"Yes": 0.9664630807277272, "No": 0.03353499696274982}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9771648698991791, "res": {"Yes": 0.9771648698991791, "No": 0.022833473128151805}, "ground_truth": 1}, {"key": "38861704", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6658928548418109, "res": {"Yes": 0.6658928548418109, "No": 0.33408557708717385}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9967572067485087, "res": {"Yes": 0.9967572067485087, "No": 0.003242331854061054}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.024355441115023704, "res": {"No": 0.9756435377461894, "Yes": 0.024355441115023704}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9879696582293699, "res": {"Yes": 0.9879696582293699, "No": 0.012029446019172246}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.984908756594229, "res": {"Yes": 0.984908756594229, "No": 0.015090867793545426}, "ground_truth": 1}, {"key": "34349607", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9904143480800385, "res": {"Yes": 0.9904143480800385, "No": 0.009584778612870966}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9975674893605849, "res": {"Yes": 0.9975674893605849, "No": 0.0024324361328852107}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9806963857190539, "res": {"Yes": 0.9806963857190539, "No": 0.0193030184292855}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9542985270953226, "res": {"Yes": 0.9542985270953226, "No": 0.04570082174283428}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981867632037494, "res": {"Yes": 0.9981867632037494, "No": 0.0018128798876696163}, "ground_truth": 1}, {"key": "20773800", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9979641107396573, "res": {"Yes": 0.9979641107396573, "No": 0.0020355524962651717}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.996032314446946, "res": {"Yes": 0.996032314446946, "No": 0.003966733575110916}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7416319745734152, "res": {"Yes": 0.7416319745734152, "No": 0.2583675229889496}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9973740536448625, "res": {"Yes": 0.9973740536448625, "No": 0.0026253123487954143}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9895661225183445, "res": {"Yes": 0.9895661225183445, "No": 0.010433687760773117}, "ground_truth": 1}, {"key": "35545608", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9790206203572994, "res": {"Yes": 0.9790206203572994, "No": 0.020979036536896624}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9942844522485917, "res": {"Yes": 0.9942844522485917, "No": 0.005715529129111066}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9898450769780205, "res": {"Yes": 0.9898450769780205, "No": 0.010154071712440949}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987635123815563, "res": {"Yes": 0.9987635123815563, "No": 0.0012362644333368184}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983285953551055, "res": {"Yes": 0.9983285953551055, "No": 0.0016709843417487627}, "ground_truth": 1}, {"key": "37258984", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9967613509745623, "res": {"Yes": 0.9967613509745623, "No": 0.0032384148912051706}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9883462195232123, "res": {"Yes": 0.9883462195232123, "No": 0.011652442083925676}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9902527662475437, "res": {"Yes": 0.9902527662475437, "No": 0.009745979870250568}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.996774503327361, "res": {"Yes": 0.996774503327361, "No": 0.0032251558639347683}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9684521302958896, "res": {"Yes": 0.9684521302958896, "No": 0.03154722340652536}, "ground_truth": 1}, {"key": "37274562", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988507972398332, "res": {"Yes": 0.9988507972398332, "No": 0.0011491382101645317}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9946409701101193, "res": {"Yes": 0.9946409701101193, "No": 0.005358683230670095}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9352395156760838, "res": {"Yes": 0.9352395156760838, "No": 0.06475863682753154}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9908950711936942, "res": {"Yes": 0.9908950711936942, "No": 0.009104443390146485}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9528445072241084, "res": {"Yes": 0.9528445072241084, "No": 0.04715498259916168}, "ground_truth": 1}, {"key": "40828068", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9791135308855049, "res": {"Yes": 0.9791135308855049, "No": 0.02088600001931496}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.982259612879458, "res": {"Yes": 0.982259612879458, "No": 0.017739313512549384}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7008417124087923, "res": {"Yes": 0.7008417124087923, "No": 0.29915438531569716}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8575909855269953, "res": {"Yes": 0.8575909855269953, "No": 0.14240228981101688}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7257904298117541, "res": {"Yes": 0.7257904298117541, "No": 0.27420513083834924}, "ground_truth": 1}, {"key": "37807180", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5101649062196192, "res": {"Yes": 0.5101649062196192, "No": 0.48982041747604416}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.33742652367531323, "res": {"No": 0.6625494515887039, "Yes": 0.33742652367531323}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9416370819137209, "res": {"Yes": 0.9416370819137209, "No": 0.05836236685690995}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9708207084815706, "res": {"Yes": 0.9708207084815706, "No": 0.029178232792712387}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.995412987623646, "res": {"Yes": 0.995412987623646, "No": 0.004586575554103597}, "ground_truth": 1}, {"key": "40748607", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9859852282328414, "res": {"Yes": 0.9859852282328414, "No": 0.014014142229281351}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9885607564327613, "res": {"Yes": 0.9885607564327613, "No": 0.01143879077618157}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.3612196705626769, "res": {"No": 0.638778979125122, "Yes": 0.3612196705626769}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9900856365786032, "res": {"Yes": 0.9900856365786032, "No": 0.009913921475321531}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9675904462030519, "res": {"Yes": 0.9675904462030519, "No": 0.03240847228787462}, "ground_truth": 1}, {"key": "40123819", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8656039306032389, "res": {"Yes": 0.8656039306032389, "No": 0.13439500725276962}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9880473897999567, "res": {"Yes": 0.9880473897999567, "No": 0.011952127296549594}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.07972675923811728, "res": {"No": 0.9202719544112259, "Yes": 0.07972675923811728}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8850833709038806, "res": {"Yes": 0.8850833709038806, "No": 0.11491593706040414}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9637219149075346, "res": {"Yes": 0.9637219149075346, "No": 0.03627719398603185}, "ground_truth": 1}, {"key": "38453867", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9937406280014541, "res": {"Yes": 0.9937406280014541, "No": 0.0062589802906877195}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9945827067432528, "res": {"Yes": 0.9945827067432528, "No": 0.005417006079537045}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.2689366040048313, "res": {"No": 0.7310594467759044, "Yes": 0.2689366040048313}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.7805504992239083, "res": {"Yes": 0.7805504992239083, "No": 0.21944830208853885}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.19663267198599682, "res": {"No": 0.8033641828619804, "Yes": 0.19663267198599682}, "ground_truth": 1}, {"key": "38944856", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8781375980755795, "res": {"Yes": 0.8781375980755795, "No": 0.12186054034815359}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.006199469841640825, "res": {"No": 0.9937925483306076, "Yes": 0.006199469841640825}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9640979419493053, "res": {"Yes": 0.9640979419493053, "No": 0.035898653182479846}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9578620732686617, "res": {"Yes": 0.9578620732686617, "No": 0.04213435659325825}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9295957079393307, "res": {"Yes": 0.9295957079393307, "No": 0.0704009163195604}, "ground_truth": 1}, {"key": "35778898", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9711867798017875, "res": {"Yes": 0.9711867798017875, "No": 0.028810237103982683}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9893804137232991, "res": {"Yes": 0.9893804137232991, "No": 0.010618258071721919}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8271761895792692, "res": {"Yes": 0.8271761895792692, "No": 0.17282030910075094}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9885534154078413, "res": {"Yes": 0.9885534154078413, "No": 0.011445862133705667}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9944260092556158, "res": {"Yes": 0.9944260092556158, "No": 0.005573433431263715}, "ground_truth": 1}, {"key": "32530125", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9673187549619013, "res": {"Yes": 0.9673187549619013, "No": 0.03268065390034607}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.998748886795521, "res": {"Yes": 0.998748886795521, "No": 0.0012508191778886602}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7321817525557117, "res": {"Yes": 0.7321817525557117, "No": 0.26781672348732205}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.998413201997352, "res": {"Yes": 0.998413201997352, "No": 0.0015864332842247457}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9889169031216128, "res": {"Yes": 0.9889169031216128, "No": 0.01108264397751692}, "ground_truth": 1}, {"key": "35010363", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9701628819941747, "res": {"Yes": 0.9701628819941747, "No": 0.029836732807689947}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9976672740438718, "res": {"Yes": 0.9976672740438718, "No": 0.0023324727687877313}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9357476813992756, "res": {"Yes": 0.9357476813992756, "No": 0.0642491490276729}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.5738491626705164, "res": {"Yes": 0.5738491626705164, "No": 0.4261476485166816}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9858852267946708, "res": {"Yes": 0.9858852267946708, "No": 0.014114005171419922}, "ground_truth": 1}, {"key": "27514800", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9924057982771797, "res": {"Yes": 0.9924057982771797, "No": 0.007593894574539972}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9674616675175478, "res": {"Yes": 0.9674616675175478, "No": 0.0325378158971055}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.831987077610247, "res": {"Yes": 0.831987077610247, "No": 0.16800823213093538}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9932385606938126, "res": {"Yes": 0.9932385606938126, "No": 0.006760771285145019}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9920610939008916, "res": {"Yes": 0.9920610939008916, "No": 0.007938047029782904}, "ground_truth": 1}, {"key": "25725840", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9830158528066034, "res": {"Yes": 0.9830158528066034, "No": 0.016982769631574617}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9791481568686642, "res": {"Yes": 0.9791481568686642, "No": 0.02085067410677038}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9791106669826155, "res": {"Yes": 0.9791106669826155, "No": 0.020888092901368536}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9885007704089939, "res": {"Yes": 0.9885007704089939, "No": 0.011498030720179417}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6139780504050288, "res": {"Yes": 0.6139780504050288, "No": 0.38599964083384725}, "ground_truth": 1}, {"key": "38327225", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9858542862582346, "res": {"Yes": 0.9858542862582346, "No": 0.01414309584664057}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9961212456759259, "res": {"Yes": 0.9961212456759259, "No": 0.003876697684464691}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.482118825201304, "res": {"No": 0.5178737304747909, "Yes": 0.482118825201304}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9730364130619715, "res": {"Yes": 0.9730364130619715, "No": 0.026962469458700074}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9888601399316043, "res": {"Yes": 0.9888601399316043, "No": 0.011136761454375933}, "ground_truth": 1}, {"key": "11991724", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9703446842665107, "res": {"Yes": 0.9703446842665107, "No": 0.029654057985082118}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9880995294483355, "res": {"Yes": 0.9880995294483355, "No": 0.011898118507245179}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.03986246996943594, "res": {"No": 0.9601368553734102, "Yes": 0.03986246996943594}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9943241724189643, "res": {"Yes": 0.9943241724189643, "No": 0.005675692480859136}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9941748483720526, "res": {"Yes": 0.9941748483720526, "No": 0.005825036578379267}, "ground_truth": 1}, {"key": "32217545", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9957266916775244, "res": {"Yes": 0.9957266916775244, "No": 0.004273212041435375}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9863935629048111, "res": {"Yes": 0.9863935629048111, "No": 0.013606231400016116}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.4623223518151609, "res": {"No": 0.5376757745617702, "Yes": 0.4623223518151609}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.996625755999626, "res": {"Yes": 0.996625755999626, "No": 0.0033736211037285074}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9758152563623338, "res": {"Yes": 0.9758152563623338, "No": 0.024183441876116794}, "ground_truth": 1}, {"key": "12731847", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9969511302719953, "res": {"Yes": 0.9969511302719953, "No": 0.0030482601309485633}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.863346683122466, "res": {"Yes": 0.863346683122466, "No": 0.13665080167434293}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.868937472239511, "res": {"Yes": 0.868937472239511, "No": 0.13105987488635498}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9061246134119049, "res": {"Yes": 0.9061246134119049, "No": 0.09387435873247486}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9136717387618752, "res": {"Yes": 0.9136717387618752, "No": 0.08632664327296784}, "ground_truth": 1}, {"key": "36827234", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9813605517773132, "res": {"Yes": 0.9813605517773132, "No": 0.018638994766774754}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9675617614418801, "res": {"Yes": 0.9675617614418801, "No": 0.032435590073173114}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6893820240599904, "res": {"Yes": 0.6893820240599904, "No": 0.31061505460873357}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.4483666348038094, "res": {"No": 0.551630853540404, "Yes": 0.4483666348038094}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3721252082583992, "res": {"No": 0.6278694099984612, "Yes": 0.3721252082583992}, "ground_truth": 1}, {"key": "29111539", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8399029328104021, "res": {"Yes": 0.8399029328104021, "No": 0.16009599098280797}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9474112215386374, "res": {"Yes": 0.9474112215386374, "No": 0.05258611328935317}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9422830140360275, "res": {"Yes": 0.9422830140360275, "No": 0.057716706883758816}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9957920536214495, "res": {"Yes": 0.9957920536214495, "No": 0.004207672224491073}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.997791879093706, "res": {"Yes": 0.997791879093706, "No": 0.002207951756979616}, "ground_truth": 1}, {"key": "37763052", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6230736991501455, "res": {"Yes": 0.6230736991501455, "No": 0.37692564089258845}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9983332292078679, "res": {"Yes": 0.9983332292078679, "No": 0.0016662056557979774}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.251134958772048, "res": {"No": 0.7488615417723179, "Yes": 0.251134958772048}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9959859588869219, "res": {"Yes": 0.9959859588869219, "No": 0.004013763295811931}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9961415891697974, "res": {"Yes": 0.9961415891697974, "No": 0.003858144154928719}, "ground_truth": 1}, {"key": "30682335", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996393353551356, "res": {"Yes": 0.9996393353551356, "No": 0.0003605273273824685}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9917631770608879, "res": {"Yes": 0.9917631770608879, "No": 0.00823637466265107}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.11675041824981715, "res": {"No": 0.883247758970494, "Yes": 0.11675041824981715}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9983624572386632, "res": {"Yes": 0.9983624572386632, "No": 0.0016372642013488805}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994778435111729, "res": {"Yes": 0.9994778435111729, "No": 0.0005219806707404964}, "ground_truth": 1}, {"key": "12261276", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999966549126493, "res": {"Yes": 0.999966549126493, "No": 3.328369235494982e-05}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991534484271382, "res": {"Yes": 0.9991534484271382, "No": 0.0008462954733973212}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.06342427527817453, "res": {"No": 0.9365749202789337, "Yes": 0.06342427527817453}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9976230218971085, "res": {"Yes": 0.9976230218971085, "No": 0.002376666032629224}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9905210046107273, "res": {"Yes": 0.9905210046107273, "No": 0.009478539839002667}, "ground_truth": 1}, {"key": "36912979", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9954060123911158, "res": {"Yes": 0.9954060123911158, "No": 0.004593619516398819}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996365946877124, "res": {"Yes": 0.9996365946877124, "No": 0.0003633083991342832}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6814972248323926, "res": {"Yes": 0.6814972248323926, "No": 0.31849996941442066}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9731459091716694, "res": {"Yes": 0.9731459091716694, "No": 0.026852692062980233}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9808459023181277, "res": {"Yes": 0.9808459023181277, "No": 0.01915148539586288}, "ground_truth": 1}, {"key": "30205259", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9413165137745361, "res": {"Yes": 0.9413165137745361, "No": 0.0586822196544874}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8675791778753891, "res": {"Yes": 0.8675791778753891, "No": 0.13242021047982885}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9950221603131965, "res": {"Yes": 0.9950221603131965, "No": 0.004977542418564775}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.0001093860707634551, "res": {"No": 0.9998903834333349, "Yes": 0.0001093860707634551}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.996236946896934, "res": {"Yes": 0.996236946896934, "No": 0.0037621055184443994}, "ground_truth": 1}, {"key": "39458032", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9943301810386606, "res": {"Yes": 0.9943301810386606, "No": 0.0056688897029679875}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9874495812226837, "res": {"Yes": 0.9874495812226837, "No": 0.012549312447059029}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5369402644748639, "res": {"Yes": 0.5369402644748639, "No": 0.4630409888410428}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8198452139643597, "res": {"Yes": 0.8198452139643597, "No": 0.18015380873337603}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9988010834753748, "res": {"Yes": 0.9988010834753748, "No": 0.001198483825260814}, "ground_truth": 1}, {"key": "35116452", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.994715982494292, "res": {"Yes": 0.994715982494292, "No": 0.005281500690297678}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9971092111998886, "res": {"Yes": 0.9971092111998886, "No": 0.0028904271066764946}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8716658657371844, "res": {"Yes": 0.8716658657371844, "No": 0.1283328242825256}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9652937836111574, "res": {"Yes": 0.9652937836111574, "No": 0.034699395948675195}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9775867932653625, "res": {"Yes": 0.9775867932653625, "No": 0.022409282693584175}, "ground_truth": 1}, {"key": "40107476", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9912933111695953, "res": {"Yes": 0.9912933111695953, "No": 0.008705545870772515}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.726818201087915, "res": {"Yes": 0.726818201087915, "No": 0.2731725041549331}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8278275907585535, "res": {"Yes": 0.8278275907585535, "No": 0.1721717248410589}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9738031670301678, "res": {"Yes": 0.9738031670301678, "No": 0.026196150670036642}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6402094631519083, "res": {"Yes": 0.6402094631519083, "No": 0.3597900183746188}, "ground_truth": 1}, {"key": "39501049", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9639864890288967, "res": {"Yes": 0.9639864890288967, "No": 0.036013277106690195}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8415979692205491, "res": {"Yes": 0.8415979692205491, "No": 0.15840145722362986}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5158976427197833, "res": {"Yes": 0.5158976427197833, "No": 0.48409960402402596}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9792549152952313, "res": {"Yes": 0.9792549152952313, "No": 0.02074473083946427}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9948793779134184, "res": {"Yes": 0.9948793779134184, "No": 0.005120570604238746}, "ground_truth": 1}, {"key": "39642178", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5104102831076609, "res": {"Yes": 0.5104102831076609, "No": 0.48958886064177953}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9593991508490338, "res": {"Yes": 0.9593991508490338, "No": 0.04060041462451982}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9788068872658825, "res": {"Yes": 0.9788068872658825, "No": 0.021192167989911942}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999748931371826, "res": {"Yes": 0.9999748931371826, "No": 2.4713102237804643e-05}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999742971333243, "res": {"Yes": 0.9999742971333243, "No": 2.5577180262615537e-05}, "ground_truth": 1}, {"key": "38024796", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999591587700257, "res": {"Yes": 0.9999591587700257, "No": 4.074721761479822e-05}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996594700634849, "res": {"Yes": 0.9996594700634849, "No": 0.000340133387878727}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9716517058996372, "res": {"Yes": 0.9716517058996372, "No": 0.028347730639551177}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9919736941924804, "res": {"Yes": 0.9919736941924804, "No": 0.00802588769407061}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9961839503938784, "res": {"Yes": 0.9961839503938784, "No": 0.003815743670321024}, "ground_truth": 1}, {"key": "36652079", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8342000727583367, "res": {"Yes": 0.8342000727583367, "No": 0.1657991770174228}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.715260247497292, "res": {"Yes": 0.715260247497292, "No": 0.28473888831927613}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9989213293838431, "res": {"Yes": 0.9989213293838431, "No": 0.001078274716400243}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999520068687072, "res": {"Yes": 0.9999520068687072, "No": 4.763303618307312e-05}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999856212553752, "res": {"Yes": 0.9999856212553752, "No": 1.4061570190110655e-05}, "ground_truth": 1}, {"key": "32193402", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999695291212467, "res": {"Yes": 0.9999695291212467, "No": 3.0138142371293223e-05}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998938399653753, "res": {"Yes": 0.9998938399653753, "No": 0.00010584159165056526}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9225993891083777, "res": {"Yes": 0.9225993891083777, "No": 0.0774001314552233}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.998396208749905, "res": {"Yes": 0.998396208749905, "No": 0.0016037045215575555}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990989520876848, "res": {"Yes": 0.9990989520876848, "No": 0.0009006318368598957}, "ground_truth": 1}, {"key": "32589706", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9833366504333697, "res": {"Yes": 0.9833366504333697, "No": 0.016663159739376283}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.999620161656856, "res": {"Yes": 0.999620161656856, "No": 0.0003796730767034257}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7009511854249262, "res": {"Yes": 0.7009511854249262, "No": 0.29904709788083617}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9659131168148747, "res": {"Yes": 0.9659131168148747, "No": 0.034086338686283156}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9917432428213688, "res": {"Yes": 0.9917432428213688, "No": 0.008255868484973134}, "ground_truth": 1}, {"key": "38590589", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9917750118414919, "res": {"Yes": 0.9917750118414919, "No": 0.008224605064152044}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9840873790744011, "res": {"Yes": 0.9840873790744011, "No": 0.01591223450372157}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.49781632265601644, "res": {"No": 0.5021266752437832, "Yes": 0.49781632265601644}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8748122561909034, "res": {"Yes": 0.8748122561909034, "No": 0.1251758162395614}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7846956840431516, "res": {"Yes": 0.7846956840431516, "No": 0.21530104950648277}, "ground_truth": 1}, {"key": "37045414", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8583673450813312, "res": {"Yes": 0.8583673450813312, "No": 0.14162825169548926}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.7603908664097778, "res": {"Yes": 0.7603908664097778, "No": 0.23957173192037537}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0010215512230805357, "res": {"No": 0.9989774793308979, "Yes": 0.0010215512230805357}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.00029720512529405604, "res": {"No": 0.9997009286607517, "Yes": 0.00029720512529405604}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.005192435624895285, "res": {"No": 0.9948056321287557, "Yes": 0.005192435624895285}, "ground_truth": 1}, {"key": "33310095", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.005890838096588132, "res": {"No": 0.994108886065045, "Yes": 0.005890838096588132}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.003231575143787633, "res": {"No": 0.9967682204781125, "Yes": 0.003231575143787633}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.4121143828036413, "res": {"No": 0.5878838409521904, "Yes": 0.4121143828036413}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9856749737010064, "res": {"Yes": 0.9856749737010064, "No": 0.014324957259726361}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987731449028249, "res": {"Yes": 0.9987731449028249, "No": 0.0012266468138031274}, "ground_truth": 1}, {"key": "37934604", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985575917721613, "res": {"Yes": 0.9985575917721613, "No": 0.0014419360344709075}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9844839674525071, "res": {"Yes": 0.9844839674525071, "No": 0.015515289839691275}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8282220534961066, "res": {"Yes": 0.8282220534961066, "No": 0.17176034788505068}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9787662294510407, "res": {"Yes": 0.9787662294510407, "No": 0.02122947994072772}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9768625339885388, "res": {"Yes": 0.9768625339885388, "No": 0.023134274882341016}, "ground_truth": 1}, {"key": "39012181", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7188791399758218, "res": {"Yes": 0.7188791399758218, "No": 0.2811071965527982}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9392807595586415, "res": {"Yes": 0.9392807595586415, "No": 0.06070651837161084}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.008343217046487158, "res": {"No": 0.9916560079638773, "Yes": 0.008343217046487158}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9847549898700143, "res": {"Yes": 0.9847549898700143, "No": 0.015244666115927837}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9834292241362514, "res": {"Yes": 0.9834292241362514, "No": 0.01657046475431612}, "ground_truth": 1}, {"key": "40221674", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9962365906426655, "res": {"Yes": 0.9962365906426655, "No": 0.0037633396914477555}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9824175601534278, "res": {"Yes": 0.9824175601534278, "No": 0.017581817632585286}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7745741147734104, "res": {"Yes": 0.7745741147734104, "No": 0.22542510585874073}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9933248896890117, "res": {"Yes": 0.9933248896890117, "No": 0.006674484540998118}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9899394641958417, "res": {"Yes": 0.9899394641958417, "No": 0.010060036360029782}, "ground_truth": 1}, {"key": "36884862", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9714420723135474, "res": {"Yes": 0.9714420723135474, "No": 0.028556382862622538}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9057639932272872, "res": {"Yes": 0.9057639932272872, "No": 0.09423480366285855}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9632800157960327, "res": {"Yes": 0.9632800157960327, "No": 0.03671710706376404}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9975512493933196, "res": {"Yes": 0.9975512493933196, "No": 0.002447344793183527}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9889066451397799, "res": {"Yes": 0.9889066451397799, "No": 0.011091244119584148}, "ground_truth": 1}, {"key": "39054429", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9975409298787771, "res": {"Yes": 0.9975409298787771, "No": 0.002457611341877011}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9965376751179171, "res": {"Yes": 0.9965376751179171, "No": 0.0034616930111654952}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.2078650516459365, "res": {"No": 0.7921335798204399, "Yes": 0.2078650516459365}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9960229674247552, "res": {"Yes": 0.9960229674247552, "No": 0.003974950435540484}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9475927411251599, "res": {"Yes": 0.9475927411251599, "No": 0.052404532149114016}, "ground_truth": 1}, {"key": "36753964", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9812317576384888, "res": {"Yes": 0.9812317576384888, "No": 0.018765956033458063}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986894345419827, "res": {"Yes": 0.9986894345419827, "No": 0.0013072337704560868}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8090483164789912, "res": {"Yes": 0.8090483164789912, "No": 0.19093968235481923}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9873377756744114, "res": {"Yes": 0.9873377756744114, "No": 0.012660981976370578}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8868216747564928, "res": {"Yes": 0.8868216747564928, "No": 0.11317547371307486}, "ground_truth": 1}, {"key": "37612459", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9368376701398423, "res": {"Yes": 0.9368376701398423, "No": 0.0631561782784291}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.770803871307958, "res": {"Yes": 0.770803871307958, "No": 0.22919165592913132}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8734606841327324, "res": {"Yes": 0.8734606841327324, "No": 0.1265381797685223}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.2668470583157669, "res": {"No": 0.7331491638861934, "Yes": 0.2668470583157669}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7717756345649298, "res": {"Yes": 0.7717756345649298, "No": 0.2282026109775965}, "ground_truth": 1}, {"key": "36805789", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.36092572129715517, "res": {"No": 0.6390581993225407, "Yes": 0.36092572129715517}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.5208460866351436, "res": {"Yes": 0.5208460866351436, "No": 0.4791326733831549}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 6.935335739822535e-05, "res": {"No": 0.9999298363015874, "Yes": 6.935335739822535e-05}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9920173271150095, "res": {"Yes": 0.9920173271150095, "No": 0.007982393301631724}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.975314113672484, "res": {"Yes": 0.975314113672484, "No": 0.024682092457739335}, "ground_truth": 1}, {"key": "12757394", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8895988572969186, "res": {"Yes": 0.8895988572969186, "No": 0.11039769764736898}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9860282309443321, "res": {"Yes": 0.9860282309443321, "No": 0.013968050003921654}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9804703445844702, "res": {"Yes": 0.9804703445844702, "No": 0.019526128250677646}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.503258336134662, "res": {"Yes": 0.503258336134662, "No": 0.49673912007721616}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9582951650364457, "res": {"Yes": 0.9582951650364457, "No": 0.041703801135631255}, "ground_truth": 1}, {"key": "32192542", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9496473764282579, "res": {"Yes": 0.9496473764282579, "No": 0.05035212279156238}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9974452002698967, "res": {"Yes": 0.9974452002698967, "No": 0.0025547487417142893}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8449186402401563, "res": {"Yes": 0.8449186402401563, "No": 0.15508030491540248}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9695551024191368, "res": {"Yes": 0.9695551024191368, "No": 0.030444449008989058}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986457999575226, "res": {"Yes": 0.9986457999575226, "No": 0.0013537270671941791}, "ground_truth": 1}, {"key": "34856060", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991217968463071, "res": {"Yes": 0.9991217968463071, "No": 0.0008780919000902467}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.984636858854198, "res": {"Yes": 0.984636858854198, "No": 0.015363030078227254}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6741413310948566, "res": {"Yes": 0.6741413310948566, "No": 0.32585668853213606}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.4257871097485077, "res": {"No": 0.5741866564907322, "Yes": 0.4257871097485077}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7375796327404464, "res": {"Yes": 0.7375796327404464, "No": 0.2624148842518897}, "ground_truth": 1}, {"key": "36083416", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9746729238328878, "res": {"Yes": 0.9746729238328878, "No": 0.025326145446463572}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.598695749874218, "res": {"Yes": 0.598695749874218, "No": 0.4012987807357807}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.759040517705599, "res": {"Yes": 0.759040517705599, "No": 0.24095759869390704}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.36746507458067423, "res": {"No": 0.6325318271763813, "Yes": 0.36746507458067423}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992439046142795, "res": {"Yes": 0.9992439046142795, "No": 0.0007560394504585531}, "ground_truth": 1}, {"key": "33839050", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9977357391147905, "res": {"Yes": 0.9977357391147905, "No": 0.002263937783058585}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.966045247311108, "res": {"Yes": 0.966045247311108, "No": 0.033954071697128024}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0035503325635828707, "res": {"No": 0.9964493714423056, "Yes": 0.0035503325635828707}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9962930302396796, "res": {"Yes": 0.9962930302396796, "No": 0.0037065179538383706}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9978907494019744, "res": {"Yes": 0.9978907494019744, "No": 0.002108329186323042}, "ground_truth": 1}, {"key": "18464690", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9968056582006907, "res": {"Yes": 0.9968056582006907, "No": 0.0031938273024996666}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986289144977684, "res": {"Yes": 0.9986289144977684, "No": 0.0013707644528257127}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6667467435476213, "res": {"Yes": 0.6667467435476213, "No": 0.3332518061943408}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9872075213386285, "res": {"Yes": 0.9872075213386285, "No": 0.012791932246718015}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9890711723452699, "res": {"Yes": 0.9890711723452699, "No": 0.010927644585187425}, "ground_truth": 1}, {"key": "39212665", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9381653651922637, "res": {"Yes": 0.9381653651922637, "No": 0.06183388894560007}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9037594401782093, "res": {"Yes": 0.9037594401782093, "No": 0.0962354775303266}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9667870934538862, "res": {"Yes": 0.9667870934538862, "No": 0.03321212695584775}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9955987046144326, "res": {"Yes": 0.9955987046144326, "No": 0.004397676261280258}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9751400795214227, "res": {"Yes": 0.9751400795214227, "No": 0.02485871956645935}, "ground_truth": 1}, {"key": "40094011", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9803730622410289, "res": {"Yes": 0.9803730622410289, "No": 0.0196254477556385}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9910757119743135, "res": {"Yes": 0.9910757119743135, "No": 0.008922958651485973}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9695872611560914, "res": {"Yes": 0.9695872611560914, "No": 0.030410753486810632}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9580953123398466, "res": {"Yes": 0.9580953123398466, "No": 0.04189839325187243}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9336521971770473, "res": {"Yes": 0.9336521971770473, "No": 0.06634614638474366}, "ground_truth": 1}, {"key": "36036272", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9969301578310553, "res": {"Yes": 0.9969301578310553, "No": 0.0030695141108208267}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9648161537905228, "res": {"Yes": 0.9648161537905228, "No": 0.03518259777559473}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8894524733956569, "res": {"Yes": 0.8894524733956569, "No": 0.1105409210869013}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9966772680326783, "res": {"Yes": 0.9966772680326783, "No": 0.0033213621302495447}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9905087162833688, "res": {"Yes": 0.9905087162833688, "No": 0.009490436230785144}, "ground_truth": 1}, {"key": "30681904", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9868285784644387, "res": {"Yes": 0.9868285784644387, "No": 0.013170299795111034}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9918001040667068, "res": {"Yes": 0.9918001040667068, "No": 0.008198344789716467}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.10048940298277188, "res": {"No": 0.8995042956548557, "Yes": 0.10048940298277188}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999591587700257, "res": {"Yes": 0.9999591587700257, "No": 4.0717564435441626e-05}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999280483736425, "res": {"Yes": 0.9999280483736425, "No": 7.17543555957343e-05}, "ground_truth": 1}, {"key": "27834240", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996950894546717, "res": {"Yes": 0.9996950894546717, "No": 0.0003046566966674359}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998017242292393, "res": {"Yes": 0.9998017242292393, "No": 0.00019803079679421706}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9813310446718544, "res": {"Yes": 0.9813310446718544, "No": 0.01866767986324483}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.994403736450736, "res": {"Yes": 0.994403736450736, "No": 0.005596061120846921}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.986326757717539, "res": {"Yes": 0.986326757717539, "No": 0.013672844041936907}, "ground_truth": 1}, {"key": "35025075", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9969155882020355, "res": {"Yes": 0.9969155882020355, "No": 0.0030843303858944654}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9981751169276313, "res": {"Yes": 0.9981751169276313, "No": 0.0018247505248575387}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9097017776957407, "res": {"Yes": 0.9097017776957407, "No": 0.09029768099567331}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9917397251143252, "res": {"Yes": 0.9917397251143252, "No": 0.008260092441888085}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9988844569812416, "res": {"Yes": 0.9988844569812416, "No": 0.0011154057934124847}, "ground_truth": 1}, {"key": "33316985", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9976747480962511, "res": {"Yes": 0.9976747480962511, "No": 0.002325197028715034}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8643960707485572, "res": {"Yes": 0.8643960707485572, "No": 0.13560311930720145}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.09988662361904346, "res": {"No": 0.9001112069620268, "Yes": 0.09988662361904346}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9887064039087958, "res": {"Yes": 0.9887064039087958, "No": 0.011292234521363704}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9884596516091821, "res": {"Yes": 0.9884596516091821, "No": 0.011539819030010194}, "ground_truth": 1}, {"key": "17037056", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9943485657770708, "res": {"Yes": 0.9943485657770708, "No": 0.0056506200301229175}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9840390054800682, "res": {"Yes": 0.9840390054800682, "No": 0.01596020727323509}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.3265304084476866, "res": {"No": 0.6734638563340797, "Yes": 0.3265304084476866}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9854197829631186, "res": {"Yes": 0.9854197829631186, "No": 0.014579943902123613}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9974946656018369, "res": {"Yes": 0.9974946656018369, "No": 0.0025051155945024998}, "ground_truth": 1}, {"key": "34050457", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9964342224380228, "res": {"Yes": 0.9964342224380228, "No": 0.0035650293226586957}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9234157246286864, "res": {"Yes": 0.9234157246286864, "No": 0.07657776641858194}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9344072387390371, "res": {"Yes": 0.9344072387390371, "No": 0.06559166821605553}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9929145457366709, "res": {"Yes": 0.9929145457366709, "No": 0.0070850241304139}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.987156869020205, "res": {"Yes": 0.987156869020205, "No": 0.012842714415326362}, "ground_truth": 1}, {"key": "34713745", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.992464619812759, "res": {"Yes": 0.992464619812759, "No": 0.00753458141904606}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9888126886386011, "res": {"Yes": 0.9888126886386011, "No": 0.011186294000834075}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0460738193628912, "res": {"No": 0.9539160035533109, "Yes": 0.0460738193628912}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9689582012370795, "res": {"Yes": 0.9689582012370795, "No": 0.03103918004785749}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.916287200304751, "res": {"Yes": 0.916287200304751, "No": 0.08370172436219955}, "ground_truth": 1}, {"key": "40856210", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9273640890080725, "res": {"Yes": 0.9273640890080725, "No": 0.07262597055319087}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9536877187093536, "res": {"Yes": 0.9536877187093536, "No": 0.04630998418465036}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.03607302943249574, "res": {"No": 0.963926000774549, "Yes": 0.03607302943249574}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9934120546929482, "res": {"Yes": 0.9934120546929482, "No": 0.006587370656462627}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9921483901891925, "res": {"Yes": 0.9921483901891925, "No": 0.007850567945968783}, "ground_truth": 1}, {"key": "40848302", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.944106614617817, "res": {"Yes": 0.944106614617817, "No": 0.05589207387607872}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9930224439770434, "res": {"Yes": 0.9930224439770434, "No": 0.00697745738542425}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.02634870108227807, "res": {"No": 0.9736364486725144, "Yes": 0.02634870108227807}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.962265402684165, "res": {"Yes": 0.962265402684165, "No": 0.03773289570018712}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.979045530874874, "res": {"Yes": 0.979045530874874, "No": 0.02095326075699659}, "ground_truth": 1}, {"key": "40636168", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9275445567794691, "res": {"Yes": 0.9275445567794691, "No": 0.07245301472283022}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8156557157688245, "res": {"Yes": 0.8156557157688245, "No": 0.18434232277543344}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.03233142213890199, "res": {"No": 0.9676678991125067, "Yes": 0.03233142213890199}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.981474571888101, "res": {"Yes": 0.981474571888101, "No": 0.01852462038588433}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99766537478649, "res": {"Yes": 0.99766537478649, "No": 0.0023339550595375454}, "ground_truth": 1}, {"key": "34423311", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992262869404287, "res": {"Yes": 0.9992262869404287, "No": 0.000773193358727714}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9981783258661553, "res": {"Yes": 0.9981783258661553, "No": 0.0018210986612415905}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8906696664314496, "res": {"Yes": 0.8906696664314496, "No": 0.10932956787793684}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9535737789076216, "res": {"Yes": 0.9535737789076216, "No": 0.04642472013902938}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9171867410511572, "res": {"Yes": 0.9171867410511572, "No": 0.08281190067167588}, "ground_truth": 1}, {"key": "34833945", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.37308992597150603, "res": {"No": 0.6269090090579512, "Yes": 0.37308992597150603}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.6787250820068478, "res": {"Yes": 0.6787250820068478, "No": 0.3212730160651898}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.03608142460318491, "res": {"No": 0.9639149301480033, "Yes": 0.03608142460318491}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.7722897156636398, "res": {"Yes": 0.7722897156636398, "No": 0.22770307531703074}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8502780715477709, "res": {"Yes": 0.8502780715477709, "No": 0.149689040498407}, "ground_truth": 1}, {"key": "21272328", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9796567889640897, "res": {"Yes": 0.9796567889640897, "No": 0.020342448846680808}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.6915843735941866, "res": {"Yes": 0.6915843735941866, "No": 0.30839791438820185}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.20386872442468829, "res": {"No": 0.7961113069991178, "Yes": 0.20386872442468829}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9345323548999463, "res": {"Yes": 0.9345323548999463, "No": 0.0654632148135888}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9868220772591788, "res": {"Yes": 0.9868220772591788, "No": 0.013167498770244174}, "ground_truth": 1}, {"key": "38648957", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9932036342725674, "res": {"Yes": 0.9932036342725674, "No": 0.006786449333715604}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9915543833054917, "res": {"Yes": 0.9915543833054917, "No": 0.008443241332206796}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9563093312638813, "res": {"Yes": 0.9563093312638813, "No": 0.0436887433197347}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8224770002133773, "res": {"Yes": 0.8224770002133773, "No": 0.17752062138460395}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9979214962866854, "res": {"Yes": 0.9979214962866854, "No": 0.0020776953590748116}, "ground_truth": 1}, {"key": "24942981", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9909376746522252, "res": {"Yes": 0.9909376746522252, "No": 0.009060898424423541}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9974831573727674, "res": {"Yes": 0.9974831573727674, "No": 0.002515927892835456}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9606210755564027, "res": {"Yes": 0.9606210755564027, "No": 0.03937569622846117}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9486786436593639, "res": {"Yes": 0.9486786436593639, "No": 0.05131692290577826}, "ground_truth": 1}, {"key": "35882366", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.33668907521008723, "res": {"No": 0.6633067938522935, "Yes": 0.33668907521008723}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.19603322868769738, "res": {"No": 0.803951648735691, "Yes": 0.19603322868769738}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9920369216347694, "res": {"Yes": 0.9920369216347694, "No": 0.007962569736548075}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.99735234433932, "res": {"Yes": 0.99735234433932, "No": 0.0026473900793758083}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9973929063868779, "res": {"Yes": 0.9973929063868779, "No": 0.002606778789873857}, "ground_truth": 1}, {"key": "40559523", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9865977369222493, "res": {"Yes": 0.9865977369222493, "No": 0.013401995033637303}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9978899166624915, "res": {"Yes": 0.9978899166624915, "No": 0.0021100407898469445}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.13623373891147111, "res": {"No": 0.8637641344344943, "Yes": 0.13623373891147111}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9939712989879779, "res": {"Yes": 0.9939712989879779, "No": 0.006028321893051612}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986569790609069, "res": {"Yes": 0.9986569790609069, "No": 0.0013427823998455953}, "ground_truth": 1}, {"key": "24632722", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.99956144032542, "res": {"Yes": 0.99956144032542, "No": 0.0004383225951838595}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997127227020275, "res": {"Yes": 0.9997127227020275, "No": 0.00028692634866473425}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9983983472170347, "res": {"Yes": 0.9983983472170347, "No": 0.0016015371374780583}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9965309294775715, "res": {"Yes": 0.9965309294775715, "No": 0.003468914008005837}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9977147356017296, "res": {"Yes": 0.9977147356017296, "No": 0.0022851308305884828}, "ground_truth": 1}, {"key": "36002759", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985861230671268, "res": {"Yes": 0.9985861230671268, "No": 0.001413757818813984}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9938843968945041, "res": {"Yes": 0.9938843968945041, "No": 0.00611510707362737}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9782835006388322, "res": {"Yes": 0.9782835006388322, "No": 0.021714333641918385}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9935838452911933, "res": {"Yes": 0.9935838452911933, "No": 0.00641446553033283}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983659083839703, "res": {"Yes": 0.9983659083839703, "No": 0.0016324551016657723}, "ground_truth": 1}, {"key": "29508534", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9969955569993377, "res": {"Yes": 0.9969955569993377, "No": 0.0030036321679429486}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9920188533348413, "res": {"Yes": 0.9920188533348413, "No": 0.007979544095836543}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9891827884009977, "res": {"Yes": 0.9891827884009977, "No": 0.010816027685550851}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998081599736516, "res": {"Yes": 0.9998081599736516, "No": 0.00019155300483348261}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996922331199457, "res": {"Yes": 0.9996922331199457, "No": 0.0003076327058729837}, "ground_truth": 1}, {"key": "15631612", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995976520236648, "res": {"Yes": 0.9995976520236648, "No": 0.00040215458639175205}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9970286242014351, "res": {"Yes": 0.9970286242014351, "No": 0.0029707762401390727}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9840518226715863, "res": {"Yes": 0.9840518226715863, "No": 0.015947275171308426}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994892738650156, "res": {"Yes": 0.9994892738650156, "No": 0.0005106424087029722}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.993482171792793, "res": {"Yes": 0.993482171792793, "No": 0.006517769909319481}, "ground_truth": 1}, {"key": "40731892", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9922804095736383, "res": {"Yes": 0.9922804095736383, "No": 0.0077192760133289835}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995852635469492, "res": {"Yes": 0.9995852635469492, "No": 0.00041458605224590844}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9219372558173894, "res": {"Yes": 0.9219372558173894, "No": 0.07806189822602759}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9873913471134058, "res": {"Yes": 0.9873913471134058, "No": 0.01260730337513215}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984262681167405, "res": {"Yes": 0.9984262681167405, "No": 0.0015732612260554665}, "ground_truth": 1}, {"key": "35971910", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9803679054922838, "res": {"Yes": 0.9803679054922838, "No": 0.019630211429857273}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.996037279381598, "res": {"Yes": 0.996037279381598, "No": 0.00396162612649766}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.440064003565338, "res": {"No": 0.5599336412736373, "Yes": 0.440064003565338}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9967554319242065, "res": {"Yes": 0.9967554319242065, "No": 0.003243997219013973}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993761569110668, "res": {"Yes": 0.9993761569110668, "No": 0.0006226384155781904}, "ground_truth": 1}, {"key": "34428424", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9978267642038632, "res": {"Yes": 0.9978267642038632, "No": 0.0021715002301559896}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.00591098201368134, "res": {"No": 0.9940819102923216, "Yes": 0.00591098201368134}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8120181880459074, "res": {"Yes": 0.8120181880459074, "No": 0.18798052724516015}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9661743234426814, "res": {"Yes": 0.9661743234426814, "No": 0.03382454691463234}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9625820882147478, "res": {"Yes": 0.9625820882147478, "No": 0.03741730054882925}, "ground_truth": 1}, {"key": "36971005", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.98617230216522, "res": {"Yes": 0.98617230216522, "No": 0.013827420023291003}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.3837437142574603, "res": {"No": 0.6162551664307091, "Yes": 0.3837437142574603}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9995084487109395, "res": {"Yes": 0.9995084487109395, "No": 0.0004906417646618347}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999459277919546, "res": {"Yes": 0.9999459277919546, "No": 5.334608319833798e-05}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997152252560272, "res": {"Yes": 0.9997152252560272, "No": 0.0002841904346429971}, "ground_truth": 1}, {"key": "34649067", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996438634816706, "res": {"Yes": 0.9996438634816706, "No": 0.0003554124186903865}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999528412540086, "res": {"Yes": 0.9999528412540086, "No": 4.677709022518909e-05}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7979037682256169, "res": {"Yes": 0.7979037682256169, "No": 0.2020867334945622}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9930439645954326, "res": {"Yes": 0.9930439645954326, "No": 0.006955708427912169}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992587829371848, "res": {"Yes": 0.9992587829371848, "No": 0.0007409352636350899}, "ground_truth": 1}, {"key": "37355154", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9944851930873553, "res": {"Yes": 0.9944851930873553, "No": 0.005514211556280552}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9902772010360138, "res": {"Yes": 0.9902772010360138, "No": 0.00972190200622284}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.3835512229805936, "res": {"No": 0.6164396577001852, "Yes": 0.3835512229805936}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9950528536381492, "res": {"Yes": 0.9950528536381492, "No": 0.004946895850602134}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987701684634108, "res": {"Yes": 0.9987701684634108, "No": 0.0012290687625049947}, "ground_truth": 1}, {"key": "38674697", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9976145930158044, "res": {"Yes": 0.9976145930158044, "No": 0.002385083129769491}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993066280934744, "res": {"Yes": 0.9993066280934744, "No": 0.00069283491849603}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9275621116538719, "res": {"Yes": 0.9275621116538719, "No": 0.07242564714392913}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9944625564752289, "res": {"Yes": 0.9944625564752289, "No": 0.005531255250309432}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9960202474889495, "res": {"Yes": 0.9960202474889495, "No": 0.003978625782849396}, "ground_truth": 1}, {"key": "40525767", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9912034777970342, "res": {"Yes": 0.9912034777970342, "No": 0.008793671182865188}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990730077230515, "res": {"Yes": 0.9990730077230515, "No": 0.000925796601734302}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9690054332867708, "res": {"Yes": 0.9690054332867708, "No": 0.03099087725200126}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9589738197520129, "res": {"Yes": 0.9589738197520129, "No": 0.041023672534772605}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9897212818265845, "res": {"Yes": 0.9897212818265845, "No": 0.010275886961893448}, "ground_truth": 1}, {"key": "27165110", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9894108767205152, "res": {"Yes": 0.9894108767205152, "No": 0.010587724658587974}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9821493299793173, "res": {"Yes": 0.9821493299793173, "No": 0.017849449706485932}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9955152821153338, "res": {"Yes": 0.9955152821153338, "No": 0.004481848975294955}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9970510212054915, "res": {"Yes": 0.9970510212054915, "No": 0.002947565805301291}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9820618302648197, "res": {"Yes": 0.9820618302648197, "No": 0.017936796603021735}, "ground_truth": 1}, {"key": "35497491", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.993420410912932, "res": {"Yes": 0.993420410912932, "No": 0.006578812638526135}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9962681782192646, "res": {"Yes": 0.9962681782192646, "No": 0.0037313085095309084}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9693210713411577, "res": {"Yes": 0.9693210713411577, "No": 0.03067886186929198}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9953567639061844, "res": {"Yes": 0.9953567639061844, "No": 0.004643160511026043}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9954591713782643, "res": {"Yes": 0.9954591713782643, "No": 0.004540707691008496}, "ground_truth": 1}, {"key": "40690716", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.974908199281683, "res": {"Yes": 0.974908199281683, "No": 0.025091433146698774}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9478094396124362, "res": {"Yes": 0.9478094396124362, "No": 0.05219006123596241}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.04721488858534279, "res": {"No": 0.952778382109875, "Yes": 0.04721488858534279}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9674989338912461, "res": {"Yes": 0.9674989338912461, "No": 0.03249775724089715}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9888741264683361, "res": {"Yes": 0.9888741264683361, "No": 0.011124437655807498}, "ground_truth": 1}, {"key": "34835193", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9659307931866506, "res": {"Yes": 0.9659307931866506, "No": 0.03406641763617745}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.989155261805369, "res": {"Yes": 0.989155261805369, "No": 0.010844112207197488}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 6.858197492104411e-07, "res": {"No": 0.9999987335551019, "Yes": 6.858197492104411e-07}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998895490961502, "res": {"Yes": 0.9998895490961502, "No": 0.000110093010439215}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995589381463895, "res": {"Yes": 0.9995589381463895, "No": 0.0004407712608322561}, "ground_truth": 1}, {"key": "39471712", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998796563318803, "res": {"Yes": 0.9998796563318803, "No": 0.00012001353858553204}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9981162114610905, "res": {"Yes": 0.9981162114610905, "No": 0.0018827722259625796}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.30339563473274545, "res": {"No": 0.6965995409757458, "Yes": 0.30339563473274545}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.5155910327200393, "res": {"Yes": 0.5155910327200393, "No": 0.4843991606823972}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4067587341889299, "res": {"No": 0.5932318667996453, "Yes": 0.4067587341889299}, "ground_truth": 1}, {"key": "39115192", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.19409095297832007, "res": {"No": 0.805903742327571, "Yes": 0.19409095297832007}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.7394603097568935, "res": {"Yes": 0.7394603097568935, "No": 0.26053426263698415}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9107712508627954, "res": {"Yes": 0.9107712508627954, "No": 0.08922763303343731}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9957151104477243, "res": {"Yes": 0.9957151104477243, "No": 0.004284556212387927}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9976796166611335, "res": {"Yes": 0.9976796166611335, "No": 0.0023201394206541727}, "ground_truth": 1}, {"key": "23520673", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988824364398056, "res": {"Yes": 0.9988824364398056, "No": 0.0011174779088933993}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9979328869270284, "res": {"Yes": 0.9979328869270284, "No": 0.002066564732085633}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9806789598797965, "res": {"Yes": 0.9806789598797965, "No": 0.019317188182455532}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.991939320915584, "res": {"Yes": 0.991939320915584, "No": 0.008059234517409019}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9959664427332676, "res": {"Yes": 0.9959664427332676, "No": 0.004032977266409696}, "ground_truth": 1}, {"key": "35764233", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9742193522949769, "res": {"Yes": 0.9742193522949769, "No": 0.025780084596463648}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9960027453757323, "res": {"Yes": 0.9960027453757323, "No": 0.0039961535220595385}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9618731516521889, "res": {"Yes": 0.9618731516521889, "No": 0.038122683749943855}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.3867214548538042, "res": {"No": 0.6132307784972924, "Yes": 0.3867214548538042}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6630009539226163, "res": {"Yes": 0.6630009539226163, "No": 0.33699634310807547}, "ground_truth": 1}, {"key": "35228910", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9550048939113829, "res": {"Yes": 0.9550048939113829, "No": 0.04497547220154953}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9966843818420662, "res": {"Yes": 0.9966843818420662, "No": 0.003315410937434811}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.806966228331401, "res": {"Yes": 0.806966228331401, "No": 0.19303313902964941}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9911474952143957, "res": {"Yes": 0.9911474952143957, "No": 0.008852202330646894}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9821334634850534, "res": {"Yes": 0.9821334634850534, "No": 0.017866121734023417}, "ground_truth": 1}, {"key": "36795599", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.99140098485295, "res": {"Yes": 0.99140098485295, "No": 0.00859865915575281}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9903460524518354, "res": {"Yes": 0.9903460524518354, "No": 0.0096536905145558}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.602320249023395, "res": {"Yes": 0.602320249023395, "No": 0.39767830874839016}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9972346136448029, "res": {"Yes": 0.9972346136448029, "No": 0.0027652387514889055}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9939637597442676, "res": {"Yes": 0.9939637597442676, "No": 0.006035962203148306}, "ground_truth": 1}, {"key": "38641949", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9912833650780548, "res": {"Yes": 0.9912833650780548, "No": 0.008716190550401228}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986889583669738, "res": {"Yes": 0.9986889583669738, "No": 0.001310863760356482}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9797960423779467, "res": {"Yes": 0.9797960423779467, "No": 0.020203033754205228}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9970885774347901, "res": {"Yes": 0.9970885774347901, "No": 0.0029107360256600324}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.995959938496055, "res": {"Yes": 0.995959938496055, "No": 0.004038863251472081}, "ground_truth": 1}, {"key": "29968443", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9861061159530201, "res": {"Yes": 0.9861061159530201, "No": 0.013892842493387363}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9948075221617716, "res": {"Yes": 0.9948075221617716, "No": 0.005191456423877986}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9917728983711935, "res": {"Yes": 0.9917728983711935, "No": 0.008226751966232322}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9976637133757247, "res": {"Yes": 0.9976637133757247, "No": 0.002336222430530826}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9955989421643118, "res": {"Yes": 0.9955989421643118, "No": 0.004400631172720867}, "ground_truth": 1}, {"key": "21268042", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9604077097716369, "res": {"Yes": 0.9604077097716369, "No": 0.039591643726361404}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987575668599399, "res": {"Yes": 0.9987575668599399, "No": 0.0012423937707832758}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6048294872086546, "res": {"Yes": 0.6048294872086546, "No": 0.39516621702447063}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9904049867278623, "res": {"Yes": 0.9904049867278623, "No": 0.009594017997423208}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9969157071340723, "res": {"Yes": 0.9969157071340723, "No": 0.0030840241270931453}, "ground_truth": 1}, {"key": "26808572", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9746143877095834, "res": {"Yes": 0.9746143877095834, "No": 0.02538390924477858}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9338906357069401, "res": {"Yes": 0.9338906357069401, "No": 0.06610688349181931}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.45426153887143816, "res": {"No": 0.5457245050940701, "Yes": 0.45426153887143816}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.847123183364292, "res": {"Yes": 0.847123183364292, "No": 0.15287362183423475}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9737606546332934, "res": {"Yes": 0.9737606546332934, "No": 0.026237310928937167}, "ground_truth": 1}, {"key": "37829390", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6852172073231032, "res": {"Yes": 0.6852172073231032, "No": 0.31475459729022903}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8720972988917578, "res": {"Yes": 0.8720972988917578, "No": 0.1278982184162282}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8649025477134507, "res": {"Yes": 0.8649025477134507, "No": 0.1350968973311875}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9707200493010736, "res": {"Yes": 0.9707200493010736, "No": 0.029279677625725375}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9643305779226715, "res": {"Yes": 0.9643305779226715, "No": 0.03566922524215043}, "ground_truth": 1}, {"key": "35716045", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8946150544406076, "res": {"Yes": 0.8946150544406076, "No": 0.10538442754951044}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9542658477018183, "res": {"Yes": 0.9542658477018183, "No": 0.0457325628410175}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5150323318488353, "res": {"Yes": 0.5150323318488353, "No": 0.48496452259929357}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.379467016108044, "res": {"No": 0.6205298432540434, "Yes": 0.379467016108044}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.48159551391088645, "res": {"No": 0.5184020393606183, "Yes": 0.48159551391088645}, "ground_truth": 1}, {"key": "34367070", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6704789776471671, "res": {"Yes": 0.6704789776471671, "No": 0.32951799944214566}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.31497204189009587, "res": {"No": 0.6850265653082471, "Yes": 0.31497204189009587}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7825888082785041, "res": {"Yes": 0.7825888082785041, "No": 0.21740330543886746}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9896463182271873, "res": {"Yes": 0.9896463182271873, "No": 0.010352777891172107}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9699042244888025, "res": {"Yes": 0.9699042244888025, "No": 0.030094970927714453}, "ground_truth": 1}, {"key": "35239748", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8253397246121138, "res": {"Yes": 0.8253397246121138, "No": 0.17465235914085486}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8096308143258584, "res": {"Yes": 0.8096308143258584, "No": 0.19035037996822285}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9911441025223262, "res": {"Yes": 0.9911441025223262, "No": 0.00885500892360719}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9910869528187854, "res": {"Yes": 0.9910869528187854, "No": 0.008912651453553695}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9980380834791422, "res": {"Yes": 0.9980380834791422, "No": 0.001960998297397645}, "ground_truth": 1}, {"key": "40421370", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9962768182928784, "res": {"Yes": 0.9962768182928784, "No": 0.0037227389676242344}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9970159441717272, "res": {"Yes": 0.9970159441717272, "No": 0.0029835043887381065}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 1.952459886753654e-06, "res": {"No": 0.9999970647075079, "Yes": 1.952459886753654e-06}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9830322093434561, "res": {"Yes": 0.9830322093434561, "No": 0.016967127217708215}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9946317692263817, "res": {"Yes": 0.9946317692263817, "No": 0.005367945672425792}, "ground_truth": 1}, {"key": "37288396", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9798461729652675, "res": {"Yes": 0.9798461729652675, "No": 0.020153492107943302}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9701619816834379, "res": {"Yes": 0.9701619816834379, "No": 0.029836359850860836}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9758629300053827, "res": {"Yes": 0.9758629300053827, "No": 0.024135408123155393}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8639431076270593, "res": {"Yes": 0.8639431076270593, "No": 0.13605628752636353}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8412273774287008, "res": {"Yes": 0.8412273774287008, "No": 0.15877140748525526}, "ground_truth": 1}, {"key": "38903688", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9791255270579737, "res": {"Yes": 0.9791255270579737, "No": 0.02087303175241514}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.5129083011418034, "res": {"Yes": 0.5129083011418034, "No": 0.4870904715256292}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.00425321122075321, "res": {"No": 0.9957463037035823, "Yes": 0.00425321122075321}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8652063894418701, "res": {"Yes": 0.8652063894418701, "No": 0.1347896046927754}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9943013025299845, "res": {"Yes": 0.9943013025299845, "No": 0.005697102310401323}, "ground_truth": 1}, {"key": "28071228", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.914352595533822, "res": {"Yes": 0.914352595533822, "No": 0.08564644167717565}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9436746761076771, "res": {"Yes": 0.9436746761076771, "No": 0.056322059498548696}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9792670356083243, "res": {"Yes": 0.9792670356083243, "No": 0.020732329198882373}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986894345419827, "res": {"Yes": 0.9986894345419827, "No": 0.00131029889981196}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9621167441636168, "res": {"Yes": 0.9621167441636168, "No": 0.03788267103812939}, "ground_truth": 1}, {"key": "36855834", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995463118875902, "res": {"Yes": 0.9995463118875902, "No": 0.0004535062077360059}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9950185087875728, "res": {"Yes": 0.9950185087875728, "No": 0.0049814363790762145}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8018145277643893, "res": {"Yes": 0.8018145277643893, "No": 0.19818080666387894}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8338682856258878, "res": {"Yes": 0.8338682856258878, "No": 0.16613011040733502}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9048738113287772, "res": {"Yes": 0.9048738113287772, "No": 0.09512120104973754}, "ground_truth": 1}, {"key": "40548717", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9351861299873196, "res": {"Yes": 0.9351861299873196, "No": 0.06480708792804545}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.990019727781522, "res": {"Yes": 0.990019727781522, "No": 0.009977532137992181}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6784212101713757, "res": {"Yes": 0.6784212101713757, "No": 0.3215778880836258}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9979534262936941, "res": {"Yes": 0.9979534262936941, "No": 0.0020463368227539103}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996769835110633, "res": {"Yes": 0.9996769835110633, "No": 0.00032254399900856647}, "ground_truth": 1}, {"key": "37051175", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995185614287588, "res": {"Yes": 0.9995185614287588, "No": 0.00048115855788788685}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998686983108214, "res": {"Yes": 0.9998686983108214, "No": 0.00013094554607477813}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9721407310219778, "res": {"Yes": 0.9721407310219778, "No": 0.027857596081882877}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9973811686375271, "res": {"Yes": 0.9973811686375271, "No": 0.0026182600104032124}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9928937460827519, "res": {"Yes": 0.9928937460827519, "No": 0.007105582047201564}, "ground_truth": 1}, {"key": "38882119", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9962875890997824, "res": {"Yes": 0.9962875890997824, "No": 0.0037119460429761325}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.902853193675234, "res": {"Yes": 0.902853193675234, "No": 0.09714615324302676}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9135429749412138, "res": {"Yes": 0.9135429749412138, "No": 0.086456515709959}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9449703061930621, "res": {"Yes": 0.9449703061930621, "No": 0.05502905282686457}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9972033155287143, "res": {"Yes": 0.9972033155287143, "No": 0.002796433845946892}, "ground_truth": 1}, {"key": "19485402", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9651970447853366, "res": {"Yes": 0.9651970447853366, "No": 0.03480262374752326}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.3456923711954371, "res": {"No": 0.6543068552066577, "Yes": 0.3456923711954371}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8934187973910321, "res": {"Yes": 0.8934187973910321, "No": 0.10658000022525693}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987637503869295, "res": {"Yes": 0.9987637503869295, "No": 0.0012355105419927198}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9964007183987919, "res": {"Yes": 0.9964007183987919, "No": 0.0035987399812302963}, "ground_truth": 1}, {"key": "36060907", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986289144977684, "res": {"Yes": 0.9986289144977684, "No": 0.0013699697769580567}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9953153079612266, "res": {"Yes": 0.9953153079612266, "No": 0.0046839875089440385}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.00010975532366360077, "res": {"No": 0.9998864501472726, "Yes": 0.00010975532366360077}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9971618362237029, "res": {"Yes": 0.9971618362237029, "No": 0.0028366060337489617}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6638532519702791, "res": {"Yes": 0.6638532519702791, "No": 0.33613708874901443}, "ground_truth": 1}, {"key": "24037309", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9891234204208419, "res": {"Yes": 0.9891234204208419, "No": 0.010873665741187472}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9541891087280616, "res": {"Yes": 0.9541891087280616, "No": 0.0458008146913253}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9976011845672145, "res": {"Yes": 0.9976011845672145, "No": 0.002398765617454211}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9047683091932702, "res": {"Yes": 0.9047683091932702, "No": 0.09523100570802638}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9749184027242913, "res": {"Yes": 0.9749184027242913, "No": 0.025080214766412368}, "ground_truth": 1}, {"key": "35605805", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9862628625808754, "res": {"Yes": 0.9862628625808754, "No": 0.01373641422064078}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.7122292279619374, "res": {"Yes": 0.7122292279619374, "No": 0.2877688284956172}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.29642637649359094, "res": {"No": 0.7035726066544092, "Yes": 0.29642637649359094}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9829425962799283, "res": {"Yes": 0.9829425962799283, "No": 0.017056812413661766}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9923563669815593, "res": {"Yes": 0.9923563669815593, "No": 0.007640387172950396}, "ground_truth": 1}, {"key": "17706248", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9972156415364736, "res": {"Yes": 0.9972156415364736, "No": 0.0027835477383827347}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9917281198439645, "res": {"Yes": 0.9917281198439645, "No": 0.008270903711831232}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9982444938067492, "res": {"Yes": 0.9982444938067492, "No": 0.0017546260880604676}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.999163561210113, "res": {"Yes": 0.999163561210113, "No": 0.0008360963502711536}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997519228962147, "res": {"Yes": 0.9997519228962147, "No": 0.00024737157487973895}, "ground_truth": 1}, {"key": "36883559", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9982336763889036, "res": {"Yes": 0.9982336763889036, "No": 0.001766069187584716}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9764662387459062, "res": {"Yes": 0.9764662387459062, "No": 0.02352600203367899}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.01770444749609139, "res": {"No": 0.9822942381410786, "Yes": 0.01770444749609139}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9964700854485559, "res": {"Yes": 0.9964700854485559, "No": 0.0035293519798578346}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989627280388621, "res": {"Yes": 0.9989627280388621, "No": 0.0010370885114520934}, "ground_truth": 1}, {"key": "32799471", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9962735115559785, "res": {"Yes": 0.9962735115559785, "No": 0.0037258807952538922}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.995998960572491, "res": {"Yes": 0.995998960572491, "No": 0.003999921272223752}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.05240101068287618, "res": {"No": 0.9475977700131812, "Yes": 0.05240101068287618}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9939829633096117, "res": {"Yes": 0.9939829633096117, "No": 0.006016742975759905}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998772798807299, "res": {"Yes": 0.9998772798807299, "No": 0.00012244182974320363}, "ground_truth": 1}, {"key": "34797243", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995179656559027, "res": {"Yes": 0.9995179656559027, "No": 0.00048187471745107693}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9962975322987967, "res": {"Yes": 0.9962975322987967, "No": 0.003702267904297044}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.07959054297791507, "res": {"No": 0.9204044315718491, "Yes": 0.07959054297791507}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9381416636818761, "res": {"Yes": 0.9381416636818761, "No": 0.06185591810000195}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9869365503250538, "res": {"Yes": 0.9869365503250538, "No": 0.013062104855647695}, "ground_truth": 1}, {"key": "32154876", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9959112233030607, "res": {"Yes": 0.9959112233030607, "No": 0.004087894228786405}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9946650224110125, "res": {"Yes": 0.9946650224110125, "No": 0.00533358691488409}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9686038831377836, "res": {"Yes": 0.9686038831377836, "No": 0.031395129800260664}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.997905109652129, "res": {"Yes": 0.997905109652129, "No": 0.0020938441421223543}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9936398668167629, "res": {"Yes": 0.9936398668167629, "No": 0.006358791089277097}, "ground_truth": 1}, {"key": "37962274", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9981012495117882, "res": {"Yes": 0.9981012495117882, "No": 0.0018978428118611177}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9977222098101242, "res": {"Yes": 0.9977222098101242, "No": 0.0022767511190601155}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5203922108899992, "res": {"Yes": 0.5203922108899992, "No": 0.4796071536848333}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9803546157150352, "res": {"Yes": 0.9803546157150352, "No": 0.019645375733307442}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9808605866720508, "res": {"Yes": 0.9808605866720508, "No": 0.019139207485611227}, "ground_truth": 1}, {"key": "35574030", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9482252494932645, "res": {"Yes": 0.9482252494932645, "No": 0.05177402517397002}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.975048837007746, "res": {"Yes": 0.975048837007746, "No": 0.024951016902498704}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.02197801815289934, "res": {"No": 0.9780191190573568, "Yes": 0.02197801815289934}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.3080169392364282, "res": {"No": 0.6919748693445447, "Yes": 0.3080169392364282}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.00918269081681603, "res": {"No": 0.9908133693181551, "Yes": 0.00918269081681603}, "ground_truth": 1}, {"key": "39105949", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.049532453399640866, "res": {"No": 0.9504609353037813, "Yes": 0.049532453399640866}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.11022428880173042, "res": {"No": 0.8897716163756646, "Yes": 0.11022428880173042}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8244400502966459, "res": {"Yes": 0.8244400502966459, "No": 0.1755588613499915}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.953271453365691, "res": {"Yes": 0.953271453365691, "No": 0.04672834682791076}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9977334867289019, "res": {"Yes": 0.9977334867289019, "No": 0.0022665160640646356}, "ground_truth": 1}, {"key": "41064322", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9979565119704586, "res": {"Yes": 0.9979565119704586, "No": 0.0020433942617463802}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9496368448972508, "res": {"Yes": 0.9496368448972508, "No": 0.05036136829001768}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9576139683625428, "res": {"Yes": 0.9576139683625428, "No": 0.04238197182376759}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9944609111382903, "res": {"Yes": 0.9944609111382903, "No": 0.005537747219513612}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996493412451575, "res": {"Yes": 0.9996493412451575, "No": 0.000349089577374772}, "ground_truth": 1}, {"key": "28105101", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.969172576912511, "res": {"Yes": 0.969172576912511, "No": 0.030821955562997643}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9820436632710577, "res": {"Yes": 0.9820436632710577, "No": 0.0179531713997868}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9569466243629674, "res": {"Yes": 0.9569466243629674, "No": 0.04305222350848601}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9834943607776656, "res": {"Yes": 0.9834943607776656, "No": 0.016504249265312777}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9840248078677188, "res": {"Yes": 0.9840248078677188, "No": 0.015973867072703457}, "ground_truth": 1}, {"key": "36036068", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986883631485319, "res": {"Yes": 0.9986883631485319, "No": 0.0013110472941299963}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9743806116075214, "res": {"Yes": 0.9743806116075214, "No": 0.025617775832546836}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.019041337599726583, "res": {"No": 0.9809579772309386, "Yes": 0.019041337599726583}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985907582150917, "res": {"Yes": 0.9985907582150917, "No": 0.0014089620224182467}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998717971986816, "res": {"Yes": 0.9998717971986816, "No": 0.000127909701503557}, "ground_truth": 1}, {"key": "37991460", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985266952805757, "res": {"Yes": 0.9985266952805757, "No": 0.0014729742838564053}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987673147813623, "res": {"Yes": 0.9987673147813623, "No": 0.0012322198254982126}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.004667706693371062, "res": {"No": 0.9953311336999697, "Yes": 0.004667706693371062}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9966875714365286, "res": {"Yes": 0.9966875714365286, "No": 0.0033121481903397943}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998350582187204, "res": {"Yes": 0.998350582187204, "No": 0.0016491544120114116}, "ground_truth": 1}, {"key": "38437830", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.42947288976672776, "res": {"No": 0.570526152136842, "Yes": 0.42947288976672776}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9097344320271112, "res": {"Yes": 0.9097344320271112, "No": 0.09026474640927991}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.051837896735140256, "res": {"No": 0.9481607173752228, "Yes": 0.051837896735140256}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.07055574305194882, "res": {"No": 0.9294428540710294, "Yes": 0.07055574305194882}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9936268035193057, "res": {"Yes": 0.9936268035193057, "No": 0.0063707027096227015}, "ground_truth": 1}, {"key": "36507138", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9906141100365068, "res": {"Yes": 0.9906141100365068, "No": 0.009383801695944277}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8311438786033478, "res": {"Yes": 0.8311438786033478, "No": 0.16885132246380097}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.93539447525427, "res": {"Yes": 0.93539447525427, "No": 0.06460428752758147}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9968654715244383, "res": {"Yes": 0.9968654715244383, "No": 0.0031326953838584637}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993837775031184, "res": {"Yes": 0.9993837775031184, "No": 0.0006145915236598969}, "ground_truth": 1}, {"key": "37824866", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989064560554377, "res": {"Yes": 0.9989064560554377, "No": 0.0010923007800278492}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9790721563801248, "res": {"Yes": 0.9790721563801248, "No": 0.02092411410879636}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.07569675394298049, "res": {"No": 0.924302342836721, "Yes": 0.07569675394298049}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.998679799033063, "res": {"Yes": 0.998679799033063, "No": 0.001319626680747278}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9933421791558446, "res": {"Yes": 0.9933421791558446, "No": 0.006657082190249662}, "ground_truth": 1}, {"key": "25088134", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9438948969611983, "res": {"Yes": 0.9438948969611983, "No": 0.05610252267475804}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9959846564369351, "res": {"Yes": 0.9959846564369351, "No": 0.004014477809268397}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9906818525267364, "res": {"Yes": 0.9906818525267364, "No": 0.009317776050035892}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8460970920030253, "res": {"Yes": 0.8460970920030253, "No": 0.15390185268048237}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998704861276457, "res": {"Yes": 0.9998704861276457, "No": 0.0001294103751770471}, "ground_truth": 1}, {"key": "40172531", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9953606611336874, "res": {"Yes": 0.9953606611336874, "No": 0.004638644913043486}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9531713641646412, "res": {"Yes": 0.9531713641646412, "No": 0.046827108607754825}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9518387897767021, "res": {"Yes": 0.9518387897767021, "No": 0.048152982538904196}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9984156866515381, "res": {"Yes": 0.9984156866515381, "No": 0.0015831964594556721}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9978664155340149, "res": {"Yes": 0.9978664155340149, "No": 0.0021332353671233116}, "ground_truth": 1}, {"key": "37035874", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986172669574874, "res": {"Yes": 0.9986172669574874, "No": 0.0013815516878564178}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9938026741344683, "res": {"Yes": 0.9938026741344683, "No": 0.006197028351353278}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9099274481111683, "res": {"Yes": 0.9099274481111683, "No": 0.09007062447427155}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9854237177521677, "res": {"Yes": 0.9854237177521677, "No": 0.014574381256950765}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9868465764081755, "res": {"Yes": 0.9868465764081755, "No": 0.013152814668899962}, "ground_truth": 1}, {"key": "36404465", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9844742595039692, "res": {"Yes": 0.9844742595039692, "No": 0.015524555233170334}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.7793480984250094, "res": {"Yes": 0.7793480984250094, "No": 0.22064603788234452}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.03243540519083662, "res": {"No": 0.9675637739724369, "Yes": 0.03243540519083662}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8790278570914393, "res": {"Yes": 0.8790278570914393, "No": 0.12097156028237795}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9941653295911187, "res": {"Yes": 0.9941653295911187, "No": 0.00583432325652391}, "ground_truth": 1}, {"key": "39602052", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9674547472889903, "res": {"Yes": 0.9674547472889903, "No": 0.03254503033058029}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8360270332473259, "res": {"Yes": 0.8360270332473259, "No": 0.16397104854565}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.01633196579306698, "res": {"No": 0.9836671188655253, "Yes": 0.01633196579306698}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8620205402552176, "res": {"Yes": 0.8620205402552176, "No": 0.13797814167913286}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9967240446895237, "res": {"Yes": 0.9967240446895237, "No": 0.0032755666564230398}, "ground_truth": 1}, {"key": "33792789", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9920348115744811, "res": {"Yes": 0.9920348115744811, "No": 0.007963497429959474}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9598034414247303, "res": {"Yes": 0.9598034414247303, "No": 0.04019456867789717}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8531995327749601, "res": {"Yes": 0.8531995327749601, "No": 0.14679948074021112}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.981371002341473, "res": {"Yes": 0.981371002341473, "No": 0.018628174244556383}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9921262159204701, "res": {"Yes": 0.9921262159204701, "No": 0.007872914143669562}, "ground_truth": 1}, {"key": "32776626", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9944370935874604, "res": {"Yes": 0.9944370935874604, "No": 0.005562386701315757}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9308257570388305, "res": {"Yes": 0.9308257570388305, "No": 0.06917200847484753}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5832038481340756, "res": {"Yes": 0.5832038481340756, "No": 0.4167938266304324}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990720550074856, "res": {"Yes": 0.9990720550074856, "No": 0.0009274496969943316}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996931864469076, "res": {"Yes": 0.9996931864469076, "No": 0.00030600136354267754}, "ground_truth": 1}, {"key": "37195090", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993703232296777, "res": {"Yes": 0.9993703232296777, "No": 0.000629378583980759}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996964002957203, "res": {"Yes": 0.9996964002957203, "No": 0.00030334675525712346}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8411451430599648, "res": {"Yes": 0.8411451430599648, "No": 0.1588534182134282}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9878293478510884, "res": {"Yes": 0.9878293478510884, "No": 0.012169980187045607}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9723852629425338, "res": {"Yes": 0.9723852629425338, "No": 0.027612873053908846}, "ground_truth": 1}, {"key": "33981824", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9646177848291244, "res": {"Yes": 0.9646177848291244, "No": 0.03538112475694068}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9926149378776443, "res": {"Yes": 0.9926149378776443, "No": 0.007382137855518483}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8446315180890138, "res": {"Yes": 0.8446315180890138, "No": 0.15535914155123373}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9298004367510144, "res": {"Yes": 0.9298004367510144, "No": 0.07019622109183019}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9685324221795129, "res": {"Yes": 0.9685324221795129, "No": 0.031464863505228}, "ground_truth": 1}, {"key": "39569142", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7886811018530341, "res": {"Yes": 0.7886811018530341, "No": 0.2113116333065238}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.7295867507462845, "res": {"Yes": 0.7295867507462845, "No": 0.27040498481947084}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.023435008146182417, "res": {"No": 0.9765635504081002, "Yes": 0.023435008146182417}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.975886433947097, "res": {"Yes": 0.975886433947097, "No": 0.02411325716247903}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8743315652437169, "res": {"Yes": 0.8743315652437169, "No": 0.12566807165326832}, "ground_truth": 1}, {"key": "40268210", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9715126219699431, "res": {"Yes": 0.9715126219699431, "No": 0.028486642095057618}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9274445691556997, "res": {"Yes": 0.9274445691556997, "No": 0.07255501335295675}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.012990707302667718, "res": {"No": 0.9870085420764868, "Yes": 0.012990707302667718}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.16265808814520474, "res": {"No": 0.8373403753883252, "Yes": 0.16265808814520474}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9727657189085245, "res": {"Yes": 0.9727657189085245, "No": 0.027233928818035828}, "ground_truth": 1}, {"key": "34925159", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8306834776552124, "res": {"Yes": 0.8306834776552124, "No": 0.16931620106555428}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.5552001497871741, "res": {"Yes": 0.5552001497871741, "No": 0.4447989040345062}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9781723915523782, "res": {"Yes": 0.9781723915523782, "No": 0.02182712754481441}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9634238394217979, "res": {"Yes": 0.9634238394217979, "No": 0.036575659529124165}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9862132355753753, "res": {"Yes": 0.9862132355753753, "No": 0.013785753157244401}, "ground_truth": 1}, {"key": "36181903", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.41093184725493487, "res": {"No": 0.5890662340234777, "Yes": 0.41093184725493487}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.5786518775417864, "res": {"Yes": 0.5786518775417864, "No": 0.4213465182998015}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9744386747293214, "res": {"Yes": 0.9744386747293214, "No": 0.025561026241126227}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.6719503131214244, "res": {"Yes": 0.6719503131214244, "No": 0.32804758660776717}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9399338016260823, "res": {"Yes": 0.9399338016260823, "No": 0.06006480597887885}, "ground_truth": 1}, {"key": "38620559", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9804726369268156, "res": {"Yes": 0.9804726369268156, "No": 0.01952714363574647}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9518014279819731, "res": {"Yes": 0.9518014279819731, "No": 0.04819703376328819}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.055585283389536624, "res": {"No": 0.9444133706933733, "Yes": 0.055585283389536624}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.7374272497799552, "res": {"Yes": 0.7374272497799552, "No": 0.2625699385147853}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6656217917513753, "res": {"Yes": 0.6656217917513753, "No": 0.3343689683747441}, "ground_truth": 1}, {"key": "32719657", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7553847400949713, "res": {"Yes": 0.7553847400949713, "No": 0.24461246408888498}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.010504429012935973, "res": {"No": 0.9894923362923297, "Yes": 0.010504429012935973}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0041538569118206685, "res": {"No": 0.9958439539629048, "Yes": 0.0041538569118206685}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9972109047734261, "res": {"Yes": 0.9972109047734261, "No": 0.002788753101698367}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9923403959282387, "res": {"Yes": 0.9923403959282387, "No": 0.0076586748980868044}, "ground_truth": 1}, {"key": "37530914", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9821929148050373, "res": {"Yes": 0.9821929148050373, "No": 0.01780657870138849}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9958667733910492, "res": {"Yes": 0.9958667733910492, "No": 0.004132441022492891}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9407970269240761, "res": {"Yes": 0.9407970269240761, "No": 0.05920122764858577}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9895624947755891, "res": {"Yes": 0.9895624947755891, "No": 0.010437032293633205}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9988360513143608, "res": {"Yes": 0.9988360513143608, "No": 0.0011635320842130759}, "ground_truth": 1}, {"key": "33306933", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986328392169774, "res": {"Yes": 0.9986328392169774, "No": 0.001367124568410852}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9984702391787551, "res": {"Yes": 0.9984702391787551, "No": 0.0015294623661569905}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9990509977277514, "res": {"Yes": 0.9990509977277514, "No": 0.0009485623835582784}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9636492857786134, "res": {"Yes": 0.9636492857786134, "No": 0.036348984940517665}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9392874904686814, "res": {"Yes": 0.9392874904686814, "No": 0.06071158757753128}, "ground_truth": 1}, {"key": "33837212", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9942166903934554, "res": {"Yes": 0.9942166903934554, "No": 0.005782749600399636}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.019420268310883667, "res": {"No": 0.9805785405539758, "Yes": 0.019420268310883667}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.1212280616961381, "res": {"No": 0.8787704802607235, "Yes": 0.1212280616961381}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9974140144363098, "res": {"Yes": 0.9974140144363098, "No": 0.0025843395419778855}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.963210990586549, "res": {"Yes": 0.963210990586549, "No": 0.036787660593332425}, "ground_truth": 1}, {"key": "40945179", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8753519280508307, "res": {"Yes": 0.8753519280508307, "No": 0.1246438635084709}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.981610773693787, "res": {"Yes": 0.981610773693787, "No": 0.018387920950859518}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.18116124281514653, "res": {"No": 0.8188382267299723, "Yes": 0.18116124281514653}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9901189356986875, "res": {"Yes": 0.9901189356986875, "No": 0.009879977940181554}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985415409865768, "res": {"Yes": 0.9985415409865768, "No": 0.0014582196575316701}, "ground_truth": 1}, {"key": "34152358", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987431758658392, "res": {"Yes": 0.9987431758658392, "No": 0.0012565461156283974}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9979301544907315, "res": {"Yes": 0.9979301544907315, "No": 0.002069163209609475}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5319344880427355, "res": {"Yes": 0.5319344880427355, "No": 0.4680626146624788}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989372672009041, "res": {"Yes": 0.9989372672009041, "No": 0.001062696216354379}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9892195322780943, "res": {"Yes": 0.9892195322780943, "No": 0.010779932231756873}, "ground_truth": 1}, {"key": "34136541", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9925590321031273, "res": {"Yes": 0.9925590321031273, "No": 0.0074406133054973796}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8196154763393312, "res": {"Yes": 0.8196154763393312, "No": 0.1803831642978308}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7972222905366235, "res": {"Yes": 0.7972222905366235, "No": 0.2027771066456351}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.558057709447267, "res": {"Yes": 0.558057709447267, "No": 0.44194187945336166}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9802894243007287, "res": {"Yes": 0.9802894243007287, "No": 0.01970974688220838}, "ground_truth": 1}, {"key": "37469603", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9879698933661766, "res": {"Yes": 0.9879698933661766, "No": 0.01202917175092968}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9694281571853118, "res": {"Yes": 0.9694281571853118, "No": 0.030571725513633262}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6126991785233493, "res": {"Yes": 0.6126991785233493, "No": 0.38729967174845115}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9973963467001633, "res": {"Yes": 0.9973963467001633, "No": 0.0026024769874308223}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9940027531110223, "res": {"Yes": 0.9940027531110223, "No": 0.005995898061322295}, "ground_truth": 1}, {"key": "37353611", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9374549552736555, "res": {"Yes": 0.9374549552736555, "No": 0.06254424829589098}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9542164590424542, "res": {"Yes": 0.9542164590424542, "No": 0.045781509137748075}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8318783356863159, "res": {"Yes": 0.8318783356863159, "No": 0.1681209026126816}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9953714110868773, "res": {"Yes": 0.9953714110868773, "No": 0.004627631948179311}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9980886636341955, "res": {"Yes": 0.9980886636341955, "No": 0.0019110489484534395}, "ground_truth": 1}, {"key": "37211649", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9975606216308449, "res": {"Yes": 0.9975606216308449, "No": 0.002439194297864991}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9882845110884534, "res": {"Yes": 0.9882845110884534, "No": 0.01171500242526741}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9496489927294681, "res": {"Yes": 0.9496489927294681, "No": 0.050350672671307814}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988518688075432, "res": {"Yes": 0.9988518688075432, "No": 0.001148135794716122}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.988894862387299, "res": {"Yes": 0.988894862387299, "No": 0.011104870312522233}, "ground_truth": 1}, {"key": "37320976", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985745914611612, "res": {"Yes": 0.9985745914611612, "No": 0.001425145165391201}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993386538558287, "res": {"Yes": 0.9993386538558287, "No": 0.0006610989710811173}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7345198048609483, "res": {"Yes": 0.7345198048609483, "No": 0.26547598350499096}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9654017664478118, "res": {"Yes": 0.9654017664478118, "No": 0.034595024771113855}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.888082166880318, "res": {"Yes": 0.888082166880318, "No": 0.11191087314225902}, "ground_truth": 1}, {"key": "34492412", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9793986373753611, "res": {"Yes": 0.9793986373753611, "No": 0.020599822906405215}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8047336869197342, "res": {"Yes": 0.8047336869197342, "No": 0.19526292407644152}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8420800230533451, "res": {"Yes": 0.8420800230533451, "No": 0.15791703165255444}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9693659829654497, "res": {"Yes": 0.9693659829654497, "No": 0.030632568682451143}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9685940344235708, "res": {"Yes": 0.9685940344235708, "No": 0.03140535057835203}, "ground_truth": 1}, {"key": "36655016", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9603948471173149, "res": {"Yes": 0.9603948471173149, "No": 0.039604105177348546}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.6531673982206408, "res": {"Yes": 0.6531673982206408, "No": 0.3468297685350293}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9718915916170017, "res": {"Yes": 0.9718915916170017, "No": 0.02810789166220043}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9019092082750743, "res": {"Yes": 0.9019092082750743, "No": 0.09808967183122493}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9961656141035146, "res": {"Yes": 0.9961656141035146, "No": 0.0038340714447968574}, "ground_truth": 1}, {"key": "35220773", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9667533338380292, "res": {"Yes": 0.9667533338380292, "No": 0.033246206786403526}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9784259099494833, "res": {"Yes": 0.9784259099494833, "No": 0.021572994524636745}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9863739555684456, "res": {"Yes": 0.9863739555684456, "No": 0.013623105417882126}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9950298333566685, "res": {"Yes": 0.9950298333566685, "No": 0.004969366061635595}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9916888372794253, "res": {"Yes": 0.9916888372794253, "No": 0.00830896688287882}, "ground_truth": 1}, {"key": "31569808", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9648260335585219, "res": {"Yes": 0.9648260335585219, "No": 0.03515558791361766}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9923825462832454, "res": {"Yes": 0.9923825462832454, "No": 0.007613818187630982}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9978473016856746, "res": {"Yes": 0.9978473016856746, "No": 0.002152379079524878}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9749819704513845, "res": {"Yes": 0.9749819704513845, "No": 0.02501706467426163}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9098072546342267, "res": {"Yes": 0.9098072546342267, "No": 0.09019177881728127}, "ground_truth": 1}, {"key": "37696256", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9903632405519024, "res": {"Yes": 0.9903632405519024, "No": 0.009636304446804932}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9910001948822988, "res": {"Yes": 0.9910001948822988, "No": 0.008998986671863869}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.015828645090549806, "res": {"No": 0.9841705045095491, "Yes": 0.015828645090549806}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9645836186722719, "res": {"Yes": 0.9645836186722719, "No": 0.035415659425525714}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9873837970543359, "res": {"Yes": 0.9873837970543359, "No": 0.01261543770884972}, "ground_truth": 1}, {"key": "36874328", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9804289764719497, "res": {"Yes": 0.9804289764719497, "No": 0.0195708669227273}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9481086477147399, "res": {"Yes": 0.9481086477147399, "No": 0.05189064788195774}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.99929603739802, "res": {"Yes": 0.99929603739802, "No": 0.0007024909776792903}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988304590467317, "res": {"Yes": 0.9988304590467317, "No": 0.0011691000665820012}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9979168644448096, "res": {"Yes": 0.9979168644448096, "No": 0.002082152634494104}, "ground_truth": 1}, {"key": "24532377", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9950321950627807, "res": {"Yes": 0.9950321950627807, "No": 0.00496664856170387}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996087262075636, "res": {"Yes": 0.9996087262075636, "No": 0.0003905404534765355}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9657659924074843, "res": {"Yes": 0.9657659924074843, "No": 0.03423372533174629}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9805064383231323, "res": {"Yes": 0.9805064383231323, "No": 0.01949313552005907}, "ground_truth": 1}, {"key": "39560618", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9983782635380456, "res": {"Yes": 0.9983782635380456, "No": 0.0016215269693639848}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.76160036214468, "res": {"Yes": 0.76160036214468, "No": 0.23839900916181975}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.014220509861652528, "res": {"No": 0.985778755953874, "Yes": 0.014220509861652528}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9856970875671028, "res": {"Yes": 0.9856970875671028, "No": 0.014302785077719342}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9000867692745009, "res": {"Yes": 0.9000867692745009, "No": 0.09990945032039172}, "ground_truth": 1}, {"key": "34922693", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9365691697265774, "res": {"Yes": 0.9365691697265774, "No": 0.06342993297585892}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9905440443972463, "res": {"Yes": 0.9905440443972463, "No": 0.009454820138032853}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.21012846441267094, "res": {"No": 0.7898586212337595, "Yes": 0.21012846441267094}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9881201315383005, "res": {"Yes": 0.9881201315383005, "No": 0.01187827710970862}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9970337163387311, "res": {"Yes": 0.9970337163387311, "No": 0.002965356552405376}, "ground_truth": 1}, {"key": "33629577", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9934506487062118, "res": {"Yes": 0.9934506487062118, "No": 0.006546967589308502}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9372717569799173, "res": {"Yes": 0.9372717569799173, "No": 0.06272650458566233}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9133485242645556, "res": {"Yes": 0.9133485242645556, "No": 0.08665005743341725}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9946192708615595, "res": {"Yes": 0.9946192708615595, "No": 0.005378636906577383}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999055631297062, "res": {"Yes": 0.999055631297062, "No": 0.0009440075443070682}, "ground_truth": 1}, {"key": "32284359", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990042474908796, "res": {"Yes": 0.9990042474908796, "No": 0.0009946868439917366}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9913651314846552, "res": {"Yes": 0.9913651314846552, "No": 0.008630158568099837}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9971888610716951, "res": {"Yes": 0.9971888610716951, "No": 0.0028100696766023515}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9770114631489972, "res": {"Yes": 0.9770114631489972, "No": 0.0229878928379325}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9141435709954556, "res": {"Yes": 0.9141435709954556, "No": 0.0858528667200403}, "ground_truth": 1}, {"key": "28082962", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9953928998945403, "res": {"Yes": 0.9953928998945403, "No": 0.004606332001315045}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9985313300528409, "res": {"Yes": 0.9985313300528409, "No": 0.001468302510788367}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9307310466134129, "res": {"Yes": 0.9307310466134129, "No": 0.06926767027763994}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.957736960706933, "res": {"Yes": 0.957736960706933, "No": 0.04226035358220318}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9908369024059559, "res": {"Yes": 0.9908369024059559, "No": 0.009162721843022427}, "ground_truth": 1}, {"key": "24796803", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9761592118278393, "res": {"Yes": 0.9761592118278393, "No": 0.023839354821684457}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9733086133048243, "res": {"Yes": 0.9733086133048243, "No": 0.026690395430377906}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.3267376577347299, "res": {"No": 0.6732592015661613, "Yes": 0.3267376577347299}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.5568225898130319, "res": {"Yes": 0.5568225898130319, "No": 0.4431711945090822}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4261128464593886, "res": {"No": 0.5738833077115002, "Yes": 0.4261128464593886}, "ground_truth": 1}, {"key": "35466150", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.38715414800516307, "res": {"No": 0.6128312294242947, "Yes": 0.38715414800516307}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.5560909271849636, "res": {"Yes": 0.5560909271849636, "No": 0.4439043356555727}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9707522864784701, "res": {"Yes": 0.9707522864784701, "No": 0.029246598577937453}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.969054122187943, "res": {"Yes": 0.969054122187943, "No": 0.03094553361370744}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9957214870277094, "res": {"Yes": 0.9957214870277094, "No": 0.004278376345034164}, "ground_truth": 1}, {"key": "35754289", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9707656586833172, "res": {"Yes": 0.9707656586833172, "No": 0.029234066171673453}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8154561166148919, "res": {"Yes": 0.8154561166148919, "No": 0.18454343927297542}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9957338987734012, "res": {"Yes": 0.9957338987734012, "No": 0.004265506703731287}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9904314180184275, "res": {"Yes": 0.9904314180184275, "No": 0.00956744585446002}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9746467727345001, "res": {"Yes": 0.9746467727345001, "No": 0.025352531297125922}, "ground_truth": 1}, {"key": "36678662", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8654787300500255, "res": {"Yes": 0.8654787300500255, "No": 0.1345196626390694}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.7286792209099763, "res": {"Yes": 0.7286792209099763, "No": 0.2713191162862225}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9184108127696728, "res": {"Yes": 0.9184108127696728, "No": 0.08158857818763374}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9319721981990564, "res": {"Yes": 0.9319721981990564, "No": 0.06802655060080244}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9763419073570282, "res": {"Yes": 0.9763419073570282, "No": 0.02365727027689883}, "ground_truth": 1}, {"key": "35399671", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8963832169095443, "res": {"Yes": 0.8963832169095443, "No": 0.10361556698207341}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9904826375226932, "res": {"Yes": 0.9904826375226932, "No": 0.00951661557703777}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5989175119193698, "res": {"Yes": 0.5989175119193698, "No": 0.40108158038923114}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9397203668961639, "res": {"Yes": 0.9397203668961639, "No": 0.06027816180292106}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9438292711743815, "res": {"Yes": 0.9438292711743815, "No": 0.05616992543088047}, "ground_truth": 1}, {"key": "36888180", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8913877241424031, "res": {"Yes": 0.8913877241424031, "No": 0.10860724076435156}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8940959436928584, "res": {"Yes": 0.8940959436928584, "No": 0.10590058524470593}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9436546185155993, "res": {"Yes": 0.9436546185155993, "No": 0.056344722421426255}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9630012270399848, "res": {"Yes": 0.9630012270399848, "No": 0.036997756622515546}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9916144009339752, "res": {"Yes": 0.9916144009339752, "No": 0.008384345020465521}, "ground_truth": 1}, {"key": "28061069", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9838583948090228, "res": {"Yes": 0.9838583948090228, "No": 0.01613940470548344}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9813459688678651, "res": {"Yes": 0.9813459688678651, "No": 0.01865308371335423}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9838721276003385, "res": {"Yes": 0.9838721276003385, "No": 0.016127017626558698}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9927698901150384, "res": {"Yes": 0.9927698901150384, "No": 0.007229748709893977}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987237922466345, "res": {"Yes": 0.9987237922466345, "No": 0.0012759485706564325}, "ground_truth": 1}, {"key": "22259982", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987251017741294, "res": {"Yes": 0.9987251017741294, "No": 0.0012746712203718729}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9928139524046389, "res": {"Yes": 0.9928139524046389, "No": 0.007185662914736108}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.897147000674182, "res": {"Yes": 0.897147000674182, "No": 0.10285130238578008}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9361228791294032, "res": {"Yes": 0.9361228791294032, "No": 0.06387624113257975}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6108213573848676, "res": {"Yes": 0.6108213573848676, "No": 0.3891736242922442}, "ground_truth": 1}, {"key": "34026805", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.92500846977326, "res": {"Yes": 0.92500846977326, "No": 0.07498805835922315}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.01584692664474695, "res": {"No": 0.9841502937915929, "Yes": 0.01584692664474695}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9919824900622232, "res": {"Yes": 0.9919824900622232, "No": 0.008016931791023403}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9773841615041118, "res": {"Yes": 0.9773841615041118, "No": 0.022614664758908262}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9970446251437058, "res": {"Yes": 0.9970446251437058, "No": 0.0029547752161566154}, "ground_truth": 1}, {"key": "36713809", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9735674076390121, "res": {"Yes": 0.9735674076390121, "No": 0.026431489382134996}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9938140886200134, "res": {"Yes": 0.9938140886200134, "No": 0.006185409912939543}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.006303085522527886, "res": {"No": 0.9936940070579087, "Yes": 0.006303085522527886}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986907368338543, "res": {"Yes": 0.9986907368338543, "No": 0.0013089465834890928}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990632494649752, "res": {"Yes": 0.9990632494649752, "No": 0.0009358647404455245}, "ground_truth": 1}, {"key": "39726411", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9981605147306538, "res": {"Yes": 0.9981605147306538, "No": 0.0018380262009661506}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992816313204188, "res": {"Yes": 0.9992816313204188, "No": 0.0007179554774221847}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.053241335566387826, "res": {"No": 0.9467576821502377, "Yes": 0.053241335566387826}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9953358617331506, "res": {"Yes": 0.9953358617331506, "No": 0.00466322644616841}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9619461740623, "res": {"Yes": 0.9619461740623, "No": 0.03805292214956386}, "ground_truth": 1}, {"key": "37069841", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9617017832470545, "res": {"Yes": 0.9617017832470545, "No": 0.03829661760043329}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.0008658585499352831, "res": {"No": 0.9991302492526368, "Yes": 0.0008658585499352831}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8603904423758839, "res": {"Yes": 0.8603904423758839, "No": 0.13960875161039002}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9952692531001255, "res": {"Yes": 0.9952692531001255, "No": 0.004729338208043546}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9982941359428829, "res": {"Yes": 0.9982941359428829, "No": 0.0017050850274806188}, "ground_truth": 1}, {"key": "38894693", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9918864281534749, "res": {"Yes": 0.9918864281534749, "No": 0.008111236146231215}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.5479179113836165, "res": {"Yes": 0.5479179113836165, "No": 0.4520756255416255}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9797581632153802, "res": {"Yes": 0.9797581632153802, "No": 0.02024089937553407}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9774289218366797, "res": {"Yes": 0.9774289218366797, "No": 0.02257053916204604}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9947847483933273, "res": {"Yes": 0.9947847483933273, "No": 0.005214399217219427}, "ground_truth": 1}, {"key": "33946032", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9966643618432945, "res": {"Yes": 0.9966643618432945, "No": 0.0033352474648953097}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.727738717310835, "res": {"Yes": 0.727738717310835, "No": 0.2722589073949901}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8243488310518784, "res": {"Yes": 0.8243488310518784, "No": 0.17564796251929551}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9343722645306375, "res": {"Yes": 0.9343722645306375, "No": 0.06562399975598952}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9929999888158058, "res": {"Yes": 0.9929999888158058, "No": 0.006999104966712969}, "ground_truth": 1}, {"key": "39035311", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9816544337670959, "res": {"Yes": 0.9816544337670959, "No": 0.018344251927706248}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9537667674381234, "res": {"Yes": 0.9537667674381234, "No": 0.04623068610368922}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9932143207045606, "res": {"Yes": 0.9932143207045606, "No": 0.006783805848039641}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9514239270442049, "res": {"Yes": 0.9514239270442049, "No": 0.04857375084769446}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.993376758992319, "res": {"Yes": 0.993376758992319, "No": 0.006622131380836887}, "ground_truth": 1}, {"key": "27680038", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9911979716770083, "res": {"Yes": 0.9911979716770083, "No": 0.008800633248765418}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9835137348239027, "res": {"Yes": 0.9835137348239027, "No": 0.01648293624771848}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0005912574605330644, "res": {"No": 0.9994078238055734, "Yes": 0.0005912574605330644}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9193728832577405, "res": {"Yes": 0.9193728832577405, "No": 0.08062616938269268}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9501531470110607, "res": {"Yes": 0.9501531470110607, "No": 0.04984598138331578}, "ground_truth": 1}, {"key": "36901907", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9708930625617875, "res": {"Yes": 0.9708930625617875, "No": 0.029106592036352262}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9666451630923365, "res": {"Yes": 0.9666451630923365, "No": 0.03335451945199474}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.959782142662883, "res": {"Yes": 0.959782142662883, "No": 0.04021392300147062}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9128539575991156, "res": {"Yes": 0.9128539575991156, "No": 0.08714530335301302}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9933210072862675, "res": {"Yes": 0.9933210072862675, "No": 0.006677540819549772}, "ground_truth": 1}, {"key": "21530542", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9867952548128289, "res": {"Yes": 0.9867952548128289, "No": 0.013203707732578358}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9773843853251104, "res": {"Yes": 0.9773843853251104, "No": 0.022614757479223847}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5488706694483616, "res": {"Yes": 0.5488706694483616, "No": 0.4511271726318863}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.30411863712634635, "res": {"No": 0.6958796362676465, "Yes": 0.30411863712634635}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6111847086578367, "res": {"Yes": 0.6111847086578367, "No": 0.38881287197977893}, "ground_truth": 1}, {"key": "38192532", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6840424170851142, "res": {"Yes": 0.6840424170851142, "No": 0.3159541723882662}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8329641957766355, "res": {"Yes": 0.8329641957766355, "No": 0.16703286324950714}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9870827560647142, "res": {"Yes": 0.9870827560647142, "No": 0.01291421030333367}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.98416368816824, "res": {"Yes": 0.98416368816824, "No": 0.015834811548652326}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.625032612879435, "res": {"Yes": 0.625032612879435, "No": 0.3749635978963292}, "ground_truth": 1}, {"key": "34102400", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.93989050927221, "res": {"Yes": 0.93989050927221, "No": 0.0601065054222614}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.7329277349634594, "res": {"Yes": 0.7329277349634594, "No": 0.2670688736769516}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7054604985306784, "res": {"Yes": 0.7054604985306784, "No": 0.29453886946461066}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9973702563498569, "res": {"Yes": 0.9973702563498569, "No": 0.002629682927358846}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990154306068197, "res": {"Yes": 0.9990154306068197, "No": 0.0009844750529729562}, "ground_truth": 1}, {"key": "36133399", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992179565554986, "res": {"Yes": 0.9992179565554986, "No": 0.0007817785825738877}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.11204928049958932, "res": {"No": 0.887950083362111, "Yes": 0.11204928049958932}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9403712733969436, "res": {"Yes": 0.9403712733969436, "No": 0.05962829912902199}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9960547855878508, "res": {"Yes": 0.9960547855878508, "No": 0.003944384330926184}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9960636538019014, "res": {"Yes": 0.9960636538019014, "No": 0.003935056653046573}, "ground_truth": 1}, {"key": "34314544", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.991903426256842, "res": {"Yes": 0.991903426256842, "No": 0.008095454885826622}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986819383073517, "res": {"Yes": 0.9986819383073517, "No": 0.001315959791654244}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.05930863903684434, "res": {"No": 0.9406901556299911, "Yes": 0.05930863903684434}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992675938404221, "res": {"Yes": 0.9992675938404221, "No": 0.0007315415778158238}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9979466566995876, "res": {"Yes": 0.9979466566995876, "No": 0.0020520602305181845}, "ground_truth": 1}, {"key": "33460074", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.991249865222799, "res": {"Yes": 0.991249865222799, "No": 0.008749877313720492}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9962201493877613, "res": {"Yes": 0.9962201493877613, "No": 0.0037784817631269815}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8504940270838341, "res": {"Yes": 0.8504940270838341, "No": 0.14949705280868403}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9940751923094719, "res": {"Yes": 0.9940751923094719, "No": 0.005923469130171635}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5148198733419186, "res": {"Yes": 0.5148198733419186, "No": 0.48516959439656027}, "ground_truth": 1}, {"key": "36191495", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.02992881650492722, "res": {"No": 0.970069548083314, "Yes": 0.02992881650492722}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9885882585755583, "res": {"Yes": 0.9885882585755583, "No": 0.011408643604013113}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.39607596710583787, "res": {"No": 0.6039229834632209, "Yes": 0.39607596710583787}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9956385212944346, "res": {"Yes": 0.9956385212944346, "No": 0.004360964632963305}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9412191367942938, "res": {"Yes": 0.9412191367942938, "No": 0.05878054771408426}, "ground_truth": 1}, {"key": "39532668", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9209579745562552, "res": {"Yes": 0.9209579745562552, "No": 0.07904155912225837}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9874491136654177, "res": {"Yes": 0.9874491136654177, "No": 0.012550409304898891}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 3.984478607733403e-05, "res": {"No": 0.9999588011756949, "Yes": 3.984478607733403e-05}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992345020431709, "res": {"Yes": 0.9992345020431709, "No": 0.0007647019560394098}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9912172962610395, "res": {"Yes": 0.9912172962610395, "No": 0.008782123887165637}, "ground_truth": 1}, {"key": "20328247", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9976061717875224, "res": {"Yes": 0.9976061717875224, "No": 0.0023921527170767515}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.998057309271136, "res": {"Yes": 0.998057309271136, "No": 0.0019418600045437127}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9676997165561054, "res": {"Yes": 0.9676997165561054, "No": 0.032293413046470425}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9939201952674005, "res": {"Yes": 0.9939201952674005, "No": 0.006074455034709663}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9970597943957302, "res": {"Yes": 0.9970597943957302, "No": 0.0029380282085504834}, "ground_truth": 1}, {"key": "39112675", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990299335689488, "res": {"Yes": 0.9990299335689488, "No": 0.0009684468067118622}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9315746617300594, "res": {"Yes": 0.9315746617300594, "No": 0.06840546909642457}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 2.418052502066284e-05, "res": {"No": 0.999975250738268, "Yes": 2.418052502066284e-05}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9769843788126336, "res": {"Yes": 0.9769843788126336, "No": 0.02301471465519203}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9910469127236401, "res": {"Yes": 0.9910469127236401, "No": 0.008952839808359682}, "ground_truth": 1}, {"key": "31620300", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9970971132475112, "res": {"Yes": 0.9970971132475112, "No": 0.0029025111583698897}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8966909801617072, "res": {"Yes": 0.8966909801617072, "No": 0.10330691129627116}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.07719922969849165, "res": {"No": 0.9227988157554063, "Yes": 0.07719922969849165}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9797626270037403, "res": {"Yes": 0.9797626270037403, "No": 0.02023682326887755}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9953960886408026, "res": {"Yes": 0.9953960886408026, "No": 0.004603817630528017}, "ground_truth": 1}, {"key": "37518509", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9921808994559418, "res": {"Yes": 0.9921808994559418, "No": 0.007817980545801928}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9377759479203747, "res": {"Yes": 0.9377759479203747, "No": 0.06222362675473308}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.08464654384517052, "res": {"No": 0.915352580544731, "Yes": 0.08464654384517052}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9967257044357848, "res": {"Yes": 0.9967257044357848, "No": 0.003274084470398342}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9952960566591893, "res": {"Yes": 0.9952960566591893, "No": 0.004702714894060637}, "ground_truth": 1}, {"key": "35454095", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9975551624952806, "res": {"Yes": 0.9975551624952806, "No": 0.002443866164536899}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.981504883235355, "res": {"Yes": 0.981504883235355, "No": 0.018494196478094478}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.996912033905225, "res": {"Yes": 0.996912033905225, "No": 0.0030879445610766006}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9965022813192465, "res": {"Yes": 0.9965022813192465, "No": 0.0034975683817761484}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9962106858394018, "res": {"Yes": 0.9962106858394018, "No": 0.0037891407523914004}, "ground_truth": 1}, {"key": "38542788", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9898708845552935, "res": {"Yes": 0.9898708845552935, "No": 0.010128763817542194}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9945423798179909, "res": {"Yes": 0.9945423798179909, "No": 0.005457540765167754}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.26824707541262677, "res": {"No": 0.7317486487591189, "Yes": 0.26824707541262677}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9934075842494197, "res": {"Yes": 0.9934075842494197, "No": 0.006592067148925783}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9946807081036954, "res": {"Yes": 0.9946807081036954, "No": 0.0053189604439246715}, "ground_truth": 1}, {"key": "23944937", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.990036904772809, "res": {"Yes": 0.990036904772809, "No": 0.009962603957892958}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9381345282035185, "res": {"Yes": 0.9381345282035185, "No": 0.06186464658582606}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7984964237876737, "res": {"Yes": 0.7984964237876737, "No": 0.20149987861550953}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9708455414220725, "res": {"Yes": 0.9708455414220725, "No": 0.02914960840694158}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9809226437609584, "res": {"Yes": 0.9809226437609584, "No": 0.019076421174995976}, "ground_truth": 1}, {"key": "31753944", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9926083697663309, "res": {"Yes": 0.9926083697663309, "No": 0.007391388297710405}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989937790801193, "res": {"Yes": 0.9989937790801193, "No": 0.0010049243640047674}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9833810333513321, "res": {"Yes": 0.9833810333513321, "No": 0.016618462547517653}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9804588849140532, "res": {"Yes": 0.9804588849140532, "No": 0.01954072948807763}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9801524305845742, "res": {"Yes": 0.9801524305845742, "No": 0.0198471670483772}, "ground_truth": 1}, {"key": "35527214", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9805305110322029, "res": {"Yes": 0.9805305110322029, "No": 0.01946886803928026}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.940890097050839, "res": {"Yes": 0.940890097050839, "No": 0.059109371318005026}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.2920646199496271, "res": {"No": 0.7079341203294539, "Yes": 0.2920646199496271}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.13198470952990984, "res": {"No": 0.8680141349063993, "Yes": 0.13198470952990984}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9857692580213167, "res": {"Yes": 0.9857692580213167, "No": 0.014230197327075905}, "ground_truth": 1}, {"key": "40400404", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9825417170876748, "res": {"Yes": 0.9825417170876748, "No": 0.017457446287616217}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.933992226119929, "res": {"Yes": 0.933992226119929, "No": 0.06600672158132385}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.008716565354653127, "res": {"No": 0.9912807877446562, "Yes": 0.008716565354653127}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9822775589265219, "res": {"Yes": 0.9822775589265219, "No": 0.01772151217954729}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992229518583933, "res": {"Yes": 0.9992229518583933, "No": 0.0007762227732887449}, "ground_truth": 1}, {"key": "21713119", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.986318403565281, "res": {"Yes": 0.986318403565281, "No": 0.013680716396413705}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9052500401355148, "res": {"Yes": 0.9052500401355148, "No": 0.09474746405204468}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5904214647930256, "res": {"Yes": 0.5904214647930256, "No": 0.4095772044515473}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9873767047019939, "res": {"Yes": 0.9873767047019939, "No": 0.012622951176999075}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9951228545858497, "res": {"Yes": 0.9951228545858497, "No": 0.00487682931898058}, "ground_truth": 1}, {"key": "28730678", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6534091540773536, "res": {"Yes": 0.6534091540773536, "No": 0.34659036524335435}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.992396869642378, "res": {"Yes": 0.992396869642378, "No": 0.007602957216030952}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9265034310737066, "res": {"Yes": 0.9265034310737066, "No": 0.07349258957169873}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.737201159753977, "res": {"Yes": 0.737201159753977, "No": 0.2627980270026842}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9634912294224731, "res": {"Yes": 0.9634912294224731, "No": 0.03650768107946159}, "ground_truth": 1}, {"key": "36823733", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9814872025656387, "res": {"Yes": 0.9814872025656387, "No": 0.018511718777658566}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.44779139104602333, "res": {"No": 0.5522044615102384, "Yes": 0.44779139104602333}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9936889546335743, "res": {"Yes": 0.9936889546335743, "No": 0.006310846243067441}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9951843595569383, "res": {"Yes": 0.9951843595569383, "No": 0.004814814788482035}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999478320122291, "res": {"Yes": 0.999478320122291, "No": 0.0005212188744080646}, "ground_truth": 1}, {"key": "35988862", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9964409674237467, "res": {"Yes": 0.9964409674237467, "No": 0.003558273940034709}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9936497581534132, "res": {"Yes": 0.9936497581534132, "No": 0.006349837861662378}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.871771117027706, "res": {"Yes": 0.871771117027706, "No": 0.12822840386843393}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8209999010938969, "res": {"Yes": 0.8209999010938969, "No": 0.17899883169612008}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.929089885254486, "res": {"Yes": 0.929089885254486, "No": 0.07090960373235973}, "ground_truth": 1}, {"key": "40499665", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9288751881013899, "res": {"Yes": 0.9288751881013899, "No": 0.07112425934620392}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9310898288007684, "res": {"Yes": 0.9310898288007684, "No": 0.06890899899394377}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.02419956793268893, "res": {"No": 0.9757993633886782, "Yes": 0.02419956793268893}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9650604636247845, "res": {"Yes": 0.9650604636247845, "No": 0.034938828630846956}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9971540130212053, "res": {"Yes": 0.9971540130212053, "No": 0.002844353152989537}, "ground_truth": 1}, {"key": "32829820", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9939184326508891, "res": {"Yes": 0.9939184326508891, "No": 0.006080772713763047}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9162756881446859, "res": {"Yes": 0.9162756881446859, "No": 0.08372078541485531}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9966410341902459, "res": {"Yes": 0.9966410341902459, "No": 0.00335887897187474}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9772483204107348, "res": {"Yes": 0.9772483204107348, "No": 0.022750822053914575}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983833773444459, "res": {"Yes": 0.9983833773444459, "No": 0.001616353973277306}, "ground_truth": 1}, {"key": "20583553", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9954277504045126, "res": {"Yes": 0.9954277504045126, "No": 0.004571115554660879}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9938385843521712, "res": {"Yes": 0.9938385843521712, "No": 0.006160625280583026}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8666166645977328, "res": {"Yes": 0.8666166645977328, "No": 0.13338094457089114}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9465338704769695, "res": {"Yes": 0.9465338704769695, "No": 0.05346256444529264}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.988183915763253, "res": {"Yes": 0.988183915763253, "No": 0.011814102901535202}, "ground_truth": 1}, {"key": "30501550", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9482659719010096, "res": {"Yes": 0.9482659719010096, "No": 0.05173046013867454}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.878269881904684, "res": {"Yes": 0.878269881904684, "No": 0.12172790645197866}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9788951835446204, "res": {"Yes": 0.9788951835446204, "No": 0.0211016319054562}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9946499368388823, "res": {"Yes": 0.9946499368388823, "No": 0.005349254893588219}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9548735859463426, "res": {"Yes": 0.9548735859463426, "No": 0.045124280818331336}, "ground_truth": 1}, {"key": "38755897", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9906062673756428, "res": {"Yes": 0.9906062673756428, "No": 0.009391248819340616}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9899693726987295, "res": {"Yes": 0.9899693726987295, "No": 0.010028151410767162}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8771830898859613, "res": {"Yes": 0.8771830898859613, "No": 0.12281578858433911}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9573957156673037, "res": {"Yes": 0.9573957156673037, "No": 0.04260376284128557}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.962527742364245, "res": {"Yes": 0.962527742364245, "No": 0.0374718985097297}, "ground_truth": 1}, {"key": "35507201", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9920052523535909, "res": {"Yes": 0.9920052523535909, "No": 0.007994575738163588}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9984633468623295, "res": {"Yes": 0.9984633468623295, "No": 0.0015360122024266943}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9106375740265592, "res": {"Yes": 0.9106375740265592, "No": 0.08936099406593477}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9932880689104654, "res": {"Yes": 0.9932880689104654, "No": 0.006711335610777811}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9875633829508131, "res": {"Yes": 0.9875633829508131, "No": 0.012436283379694733}, "ground_truth": 1}, {"key": "36453511", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9824719866114145, "res": {"Yes": 0.9824719866114145, "No": 0.017526802469914365}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9916452307049466, "res": {"Yes": 0.9916452307049466, "No": 0.008353794601951445}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.44175868832828463, "res": {"No": 0.5582287077807525, "Yes": 0.44175868832828463}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8893948511942916, "res": {"Yes": 0.8893948511942916, "No": 0.11059533166079188}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6819905483220712, "res": {"Yes": 0.6819905483220712, "No": 0.3179933526617009}, "ground_truth": 1}, {"key": "38066835", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9166810098527075, "res": {"Yes": 0.9166810098527075, "No": 0.08330789136688595}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9109921214529728, "res": {"Yes": 0.9109921214529728, "No": 0.08900278656687217}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9669475711927349, "res": {"Yes": 0.9669475711927349, "No": 0.03305142490493632}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987524511369084, "res": {"Yes": 0.9987524511369084, "No": 0.0012470785787941324}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990449277720823, "res": {"Yes": 0.9990449277720823, "No": 0.0009546548465840055}, "ground_truth": 1}, {"key": "39697181", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9962570587329274, "res": {"Yes": 0.9962570587329274, "No": 0.0037419867201529205}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9980954341916093, "res": {"Yes": 0.9980954341916093, "No": 0.001903552693686101}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.08585431764574815, "res": {"No": 0.9141450610506908, "Yes": 0.08585431764574815}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.26915552677616034, "res": {"No": 0.7308427928655757, "Yes": 0.26915552677616034}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8851152999773038, "res": {"Yes": 0.8851152999773038, "No": 0.1148841327309481}, "ground_truth": 1}, {"key": "21820893", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9193384258078127, "res": {"Yes": 0.9193384258078127, "No": 0.08066099128013872}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.18924252224900795, "res": {"No": 0.8107553783669004, "Yes": 0.18924252224900795}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0031501298650323754, "res": {"No": 0.9968497113056041, "Yes": 0.0031501298650323754}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990612285918499, "res": {"Yes": 0.9990612285918499, "No": 0.000938096439760001}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.12156832847437421, "res": {"No": 0.8784312876076846, "Yes": 0.12156832847437421}, "ground_truth": 1}, {"key": "40519933", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.962124685507997, "res": {"Yes": 0.962124685507997, "No": 0.037874750599726004}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9287062271402791, "res": {"Yes": 0.9287062271402791, "No": 0.07129303800628196}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.15591440628056047, "res": {"No": 0.8440783305365883, "Yes": 0.15591440628056047}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9974734365477722, "res": {"Yes": 0.9974734365477722, "No": 0.0025243643260729432}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9960771389017481, "res": {"Yes": 0.9960771389017481, "No": 0.003921249209708949}, "ground_truth": 1}, {"key": "30446033", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9963723152054685, "res": {"Yes": 0.9963723152054685, "No": 0.0036240539646660774}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.989085868073923, "res": {"Yes": 0.989085868073923, "No": 0.0109114410785525}, "ground_truth": 0}, {"key": "40216291", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 2.5598238946431432e-05, "res": {"No": 0.9999734627301196, "Yes": 2.5598238946431432e-05}, "ground_truth": 0}, {"key": "40216291", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9963742045284439, "res": {"Yes": 0.9963742045284439, "No": 0.0036256510746604013}, "ground_truth": 0}, {"key": "40216291", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991716565659968, "res": {"Yes": 0.9991716565659968, "No": 0.0008281295122912531}, "ground_truth": 1}, {"key": "40216291", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9905345748414464, "res": {"Yes": 0.9905345748414464, "No": 0.00946508702464692}, "ground_truth": 0}, {"key": "40216291", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9406021939249595, "res": {"Yes": 0.9406021939249595, "No": 0.05939728266740005}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9941298599955763, "res": {"Yes": 0.9941298599955763, "No": 0.005869317036718135}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990694422676359, "res": {"Yes": 0.9990694422676359, "No": 0.0009302823646465902}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981251239799688, "res": {"Yes": 0.9981251239799688, "No": 0.0018745605881713362}, "ground_truth": 1}, {"key": "33479118", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.996509858352799, "res": {"Yes": 0.996509858352799, "No": 0.003489758649605284}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9930464321141415, "res": {"Yes": 0.9930464321141415, "No": 0.006952329470171309}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.00012137726140151065, "res": {"No": 0.9998741809658787, "Yes": 0.00012137726140151065}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.6263960751812885, "res": {"Yes": 0.6263960751812885, "No": 0.37359898105500466}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8650989374812064, "res": {"Yes": 0.8650989374812064, "No": 0.13489839682668703}, "ground_truth": 1}, {"key": "22297373", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9153747543142098, "res": {"Yes": 0.9153747543142098, "No": 0.08462243147979857}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.6333415176335001, "res": {"Yes": 0.6333415176335001, "No": 0.3666548267745506}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5254652386232949, "res": {"Yes": 0.5254652386232949, "No": 0.47451912317764566}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9729002013910555, "res": {"Yes": 0.9729002013910555, "No": 0.0270980936117716}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9483207529119538, "res": {"Yes": 0.9483207529119538, "No": 0.05167683312119755}, "ground_truth": 1}, {"key": "36463668", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.969242890995163, "res": {"Yes": 0.969242890995163, "No": 0.030755950231056373}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8417019971583388, "res": {"Yes": 0.8417019971583388, "No": 0.15829537962209542}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9996859172843064, "res": {"Yes": 0.9996859172843064, "No": 0.0003138287823232878}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987933518859924, "res": {"Yes": 0.9987933518859924, "No": 0.0012064055020537933}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996121817809341, "res": {"Yes": 0.9996121817809341, "No": 0.00038726033145172447}, "ground_truth": 1}, {"key": "35264615", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990959746773342, "res": {"Yes": 0.9990959746773342, "No": 0.0009037019077636498}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9981831971879076, "res": {"Yes": 0.9981831971879076, "No": 0.0018167356267802402}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9536847870778122, "res": {"Yes": 0.9536847870778122, "No": 0.04631407816820368}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9722381881542727, "res": {"Yes": 0.9722381881542727, "No": 0.027761092192701602}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9913390073749231, "res": {"Yes": 0.9913390073749231, "No": 0.008660732289450938}, "ground_truth": 1}, {"key": "39898482", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9821286363109432, "res": {"Yes": 0.9821286363109432, "No": 0.017871214304374414}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9975414053068975, "res": {"Yes": 0.9975414053068975, "No": 0.002458250158075335}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9707861051649415, "res": {"Yes": 0.9707861051649415, "No": 0.029213393306118243}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.996945087669812, "res": {"Yes": 0.996945087669812, "No": 0.0030543768018980763}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990903808945049, "res": {"Yes": 0.9990903808945049, "No": 0.0009091883313892448}, "ground_truth": 1}, {"key": "37228721", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9983303803682299, "res": {"Yes": 0.9983303803682299, "No": 0.001669273628778729}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9912779764763285, "res": {"Yes": 0.9912779764763285, "No": 0.008720765135620501}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.21077195752138178, "res": {"No": 0.7892217307899132, "Yes": 0.21077195752138178}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9565031948127354, "res": {"Yes": 0.9565031948127354, "No": 0.04349321195828784}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.992286280120363, "res": {"Yes": 0.992286280120363, "No": 0.00771202098957762}, "ground_truth": 1}, {"key": "24535799", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9543250159271627, "res": {"Yes": 0.9543250159271627, "No": 0.045668974801538406}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9938938164785127, "res": {"Yes": 0.9938938164785127, "No": 0.006103978544477595}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.918468536703211, "res": {"Yes": 0.918468536703211, "No": 0.08153052410158014}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9969335930589117, "res": {"Yes": 0.9969335930589117, "No": 0.0030658736789030165}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998274667799792, "res": {"Yes": 0.998274667799792, "No": 0.0017249607196076929}, "ground_truth": 1}, {"key": "35177759", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.988082540211083, "res": {"Yes": 0.988082540211083, "No": 0.011916628515641313}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9974787726459414, "res": {"Yes": 0.9974787726459414, "No": 0.0025207300929368163}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6930513391975931, "res": {"Yes": 0.6930513391975931, "No": 0.306944294192827}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993457976837098, "res": {"Yes": 0.9993457976837098, "No": 0.0006537206751227604}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9968362198335396, "res": {"Yes": 0.9968362198335396, "No": 0.0031619827361432664}, "ground_truth": 1}, {"key": "34364829", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9960331414529199, "res": {"Yes": 0.9960331414529199, "No": 0.003965132527959754}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988971866460964, "res": {"Yes": 0.9988971866460964, "No": 0.0011011056143522442}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9948693415213902, "res": {"Yes": 0.9948693415213902, "No": 0.005129844151938582}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9971169041265511, "res": {"Yes": 0.9971169041265511, "No": 0.0028828612166600085}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9965787538454471, "res": {"Yes": 0.9965787538454471, "No": 0.0034203171441478998}, "ground_truth": 1}, {"key": "38090732", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9982902272296751, "res": {"Yes": 0.9982902272296751, "No": 0.001709018585217455}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9964729141322023, "res": {"Yes": 0.9964729141322023, "No": 0.0035266308402181726}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8289896987830344, "res": {"Yes": 0.8289896987830344, "No": 0.17100840015030702}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9875375772692189, "res": {"Yes": 0.9875375772692189, "No": 0.012461982115299482}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981027961706828, "res": {"Yes": 0.9981027961706828, "No": 0.001897165213185447}, "ground_truth": 1}, {"key": "30651479", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984010772412079, "res": {"Yes": 0.9984010772412079, "No": 0.0015988494534508384}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9668229764227538, "res": {"Yes": 0.9668229764227538, "No": 0.033176005176718974}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.15594568584769603, "res": {"No": 0.8440512444975592, "Yes": 0.15594568584769603}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995835953604866, "res": {"Yes": 0.9995835953604866, "No": 0.00041554181954877553}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9976434250927415, "res": {"Yes": 0.9976434250927415, "No": 0.0023550460442650118}, "ground_truth": 1}, {"key": "39380921", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9978291394335197, "res": {"Yes": 0.9978291394335197, "No": 0.0021697166413047046}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9937920818444949, "res": {"Yes": 0.9937920818444949, "No": 0.00620547693664866}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9631038335456228, "res": {"Yes": 0.9631038335456228, "No": 0.0368850116057148}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9978868311916392, "res": {"Yes": 0.9978868311916392, "No": 0.002112410640463371}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984431428613845, "res": {"Yes": 0.9984431428613845, "No": 0.0015535354654600978}, "ground_truth": 1}, {"key": "39037490", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9924444174085789, "res": {"Yes": 0.9924444174085789, "No": 0.007549738138236354}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9933085332393827, "res": {"Yes": 0.9933085332393827, "No": 0.006686785590869585}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.4649745342259164, "res": {"No": 0.5350231831683637, "Yes": 0.4649745342259164}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8816100243633934, "res": {"Yes": 0.8816100243633934, "No": 0.11838579954505918}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9831553703501933, "res": {"Yes": 0.9831553703501933, "No": 0.016844001156887323}, "ground_truth": 1}, {"key": "35917499", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9701541379555244, "res": {"Yes": 0.9701541379555244, "No": 0.029845112138665357}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9812843246129974, "res": {"Yes": 0.9812843246129974, "No": 0.018714847241789248}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9148031714647933, "res": {"Yes": 0.9148031714647933, "No": 0.08519356296969058}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9918974480729073, "res": {"Yes": 0.9918974480729073, "No": 0.008102172851545488}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984572878057398, "res": {"Yes": 0.9984572878057398, "No": 0.0015415498076532372}, "ground_truth": 1}, {"key": "34908073", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9956908770282639, "res": {"Yes": 0.9956908770282639, "No": 0.004308971648374684}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9449659413852983, "res": {"Yes": 0.9449659413852983, "No": 0.055033213180517675}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.019228552408193746, "res": {"No": 0.9807664442676542, "Yes": 0.019228552408193746}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9861204910197013, "res": {"Yes": 0.9861204910197013, "No": 0.013878588807523766}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9929931780508393, "res": {"Yes": 0.9929931780508393, "No": 0.007006135596885313}, "ground_truth": 1}, {"key": "36344759", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9878013176032089, "res": {"Yes": 0.9878013176032089, "No": 0.012198263176004838}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9753507432571631, "res": {"Yes": 0.9753507432571631, "No": 0.02464660786328408}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.828179931193645, "res": {"Yes": 0.828179931193645, "No": 0.17181714124348638}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.1842357668526906, "res": {"No": 0.8157628591835804, "Yes": 0.1842357668526906}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998219813671976, "res": {"Yes": 0.9998219813671976, "No": 0.00017794838884943015}, "ground_truth": 1}, {"key": "39984637", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.996887980203406, "res": {"Yes": 0.996887980203406, "No": 0.0031119460778037318}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9963517148229917, "res": {"Yes": 0.9963517148229917, "No": 0.0036479332669075355}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7826962337907726, "res": {"Yes": 0.7826962337907726, "No": 0.21729965052098338}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.7799192892289053, "res": {"Yes": 0.7799192892289053, "No": 0.22007530406356948}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9759123187021531, "res": {"Yes": 0.9759123187021531, "No": 0.02408438677721206}, "ground_truth": 1}, {"key": "17917326", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9934271140397692, "res": {"Yes": 0.9934271140397692, "No": 0.006570630398652295}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9273212365036342, "res": {"Yes": 0.9273212365036342, "No": 0.07267304743337119}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.07503034854700978, "res": {"No": 0.9249688941325087, "Yes": 0.07503034854700978}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9974578971298954, "res": {"Yes": 0.9974578971298954, "No": 0.0025415714707229123}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9918786870060543, "res": {"Yes": 0.9918786870060543, "No": 0.008118943858135849}, "ground_truth": 1}, {"key": "32193638", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9981880609472038, "res": {"Yes": 0.9981880609472038, "No": 0.0018111476026611722}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.959862743752504, "res": {"Yes": 0.959862743752504, "No": 0.040132243117132496}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9594064989152696, "res": {"Yes": 0.9594064989152696, "No": 0.04059251049328458}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9751511874053334, "res": {"Yes": 0.9751511874053334, "No": 0.024847816437961644}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9706529855402982, "res": {"Yes": 0.9706529855402982, "No": 0.02934518795450055}, "ground_truth": 1}, {"key": "34564692", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9906218314082795, "res": {"Yes": 0.9906218314082795, "No": 0.009377543849369716}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.5364832705337971, "res": {"Yes": 0.5364832705337971, "No": 0.46351601604468873}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.991466137900846, "res": {"Yes": 0.991466137900846, "No": 0.00852862607675251}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9922648018201418, "res": {"Yes": 0.9922648018201418, "No": 0.00773244628564111}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.985982906240363, "res": {"Yes": 0.985982906240363, "No": 0.014015015337539271}, "ground_truth": 1}, {"key": "39329284", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9956983258195771, "res": {"Yes": 0.9956983258195771, "No": 0.004299676894155106}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9405040133487723, "res": {"Yes": 0.9405040133487723, "No": 0.05948721877558509}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.973095331227446, "res": {"Yes": 0.973095331227446, "No": 0.02690409966724092}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9765649156448979, "res": {"Yes": 0.9765649156448979, "No": 0.023434504298923655}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9950896829174052, "res": {"Yes": 0.9950896829174052, "No": 0.004908919610413876}, "ground_truth": 1}, {"key": "37438541", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9911747808893634, "res": {"Yes": 0.9911747808893634, "No": 0.008824127040315476}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.995443110271833, "res": {"Yes": 0.995443110271833, "No": 0.0045552949379190875}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.04079414960042923, "res": {"No": 0.9592048710878326, "Yes": 0.04079414960042923}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9807831890360985, "res": {"Yes": 0.9807831890360985, "No": 0.019213771335442727}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9604239888202821, "res": {"Yes": 0.9604239888202821, "No": 0.03957374047502947}, "ground_truth": 1}, {"key": "34652757", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9654302104990782, "res": {"Yes": 0.9654302104990782, "No": 0.034566357379854064}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9813845492802901, "res": {"Yes": 0.9813845492802901, "No": 0.01861308781012329}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9700675235502798, "res": {"Yes": 0.9700675235502798, "No": 0.02993240518516324}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9953919542717345, "res": {"Yes": 0.9953919542717345, "No": 0.004607978598601725}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9803974610890347, "res": {"Yes": 0.9803974610890347, "No": 0.019602215174984193}, "ground_truth": 1}, {"key": "31361004", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9883526309459337, "res": {"Yes": 0.9883526309459337, "No": 0.011647135605198726}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.996572954770551, "res": {"Yes": 0.996572954770551, "No": 0.0034267886842946262}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6699911805781209, "res": {"Yes": 0.6699911805781209, "No": 0.33000756517884544}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9862852302899735, "res": {"Yes": 0.9862852302899735, "No": 0.013713064327345977}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990790778093646, "res": {"Yes": 0.9990790778093646, "No": 0.0009199336592795058}, "ground_truth": 1}, {"key": "26150727", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9889539713886031, "res": {"Yes": 0.9889539713886031, "No": 0.011044231759824465}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9981139546230763, "res": {"Yes": 0.9981139546230763, "No": 0.001884542298561674}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9795168843723145, "res": {"Yes": 0.9795168843723145, "No": 0.020482184898584915}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9490239173526123, "res": {"Yes": 0.9490239173526123, "No": 0.05097538040345887}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9960142179990293, "res": {"Yes": 0.9960142179990293, "No": 0.0039855139656002556}, "ground_truth": 1}, {"key": "36997402", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9972412558477911, "res": {"Yes": 0.9972412558477911, "No": 0.0027581337651709646}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9942527525793049, "res": {"Yes": 0.9942527525793049, "No": 0.005746477618319442}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0009786054597315428, "res": {"No": 0.9990206630637418, "Yes": 0.0009786054597315428}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9925786568765493, "res": {"Yes": 0.9925786568765493, "No": 0.007420858705461642}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9858511645555795, "res": {"Yes": 0.9858511645555795, "No": 0.01414791239074645}, "ground_truth": 1}, {"key": "37430643", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8765915154274068, "res": {"Yes": 0.8765915154274068, "No": 0.12340714105745447}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9283547458567692, "res": {"Yes": 0.9283547458567692, "No": 0.07164386162828221}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9280196424377062, "res": {"Yes": 0.9280196424377062, "No": 0.07197651454871341}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.980302254412674, "res": {"Yes": 0.980302254412674, "No": 0.019696853043172938}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9968667680485137, "res": {"Yes": 0.9968667680485137, "No": 0.0031322098537035236}, "ground_truth": 1}, {"key": "36964631", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986654128545729, "res": {"Yes": 0.9986654128545729, "No": 0.0013333349420414447}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9971324346424139, "res": {"Yes": 0.9971324346424139, "No": 0.0028664252014237103}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.36988338451835057, "res": {"No": 0.6301127672370482, "Yes": 0.36988338451835057}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9471234796104394, "res": {"Yes": 0.9471234796104394, "No": 0.05287483828817716}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9179363649416603, "res": {"Yes": 0.9179363649416603, "No": 0.08206160773265132}, "ground_truth": 1}, {"key": "35502013", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9885143890779198, "res": {"Yes": 0.9885143890779198, "No": 0.011484120774685682}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.7382120463310096, "res": {"Yes": 0.7382120463310096, "No": 0.26178068654998793}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8334806436802976, "res": {"Yes": 0.8334806436802976, "No": 0.16651657915738885}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9961705720321137, "res": {"Yes": 0.9961705720321137, "No": 0.00382913633966592}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9883280548749682, "res": {"Yes": 0.9883280548749682, "No": 0.011669796158198744}, "ground_truth": 1}, {"key": "33987664", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9925663175131604, "res": {"Yes": 0.9925663175131604, "No": 0.007432608538273942}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9938118635721412, "res": {"Yes": 0.9938118635721412, "No": 0.006184828511734013}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9844298971399713, "res": {"Yes": 0.9844298971399713, "No": 0.015567793780453027}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981937688024932, "res": {"Yes": 0.9981937688024932, "No": 0.001805377441848868}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989190669294863, "res": {"Yes": 0.9989190669294863, "No": 0.0010802316177066032}, "ground_truth": 1}, {"key": "35203721", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9839032853562246, "res": {"Yes": 0.9839032853562246, "No": 0.016092681106617608}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9962209772470494, "res": {"Yes": 0.9962209772470494, "No": 0.0037780529297836212}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9765873153712583, "res": {"Yes": 0.9765873153712583, "No": 0.023412256773762734}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9971764128419651, "res": {"Yes": 0.9971764128419651, "No": 0.0028235734934183494}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990022231107235, "res": {"Yes": 0.9990022231107235, "No": 0.0009977094064046513}, "ground_truth": 1}, {"key": "39028348", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986328392169774, "res": {"Yes": 0.9986328392169774, "No": 0.0013671089832796074}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9970205643543175, "res": {"Yes": 0.9970205643543175, "No": 0.0029791045803783537}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.016483996134594208, "res": {"No": 0.9835136197528024, "Yes": 0.016483996134594208}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9755524237240408, "res": {"Yes": 0.9755524237240408, "No": 0.02444596517980406}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8607898508616618, "res": {"Yes": 0.8607898508616618, "No": 0.1392086489152695}, "ground_truth": 1}, {"key": "37459383", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7908833054293555, "res": {"Yes": 0.7908833054293555, "No": 0.20911521729338645}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9416823277208689, "res": {"Yes": 0.9416823277208689, "No": 0.05831487922037858}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9938098606457297, "res": {"Yes": 0.9938098606457297, "No": 0.00618729365704352}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9855695830821231, "res": {"Yes": 0.9855695830821231, "No": 0.014429108342963157}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9848905999686605, "res": {"Yes": 0.9848905999686605, "No": 0.01510833047799959}, "ground_truth": 1}, {"key": "34020070", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9976024926227453, "res": {"Yes": 0.9976024926227453, "No": 0.002393202145381267}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9984556214817627, "res": {"Yes": 0.9984556214817627, "No": 0.0015425243750625372}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.19336041429104864, "res": {"No": 0.8066370693992626, "Yes": 0.19336041429104864}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9716440512576499, "res": {"Yes": 0.9716440512576499, "No": 0.02835410392572947}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9775423697980916, "res": {"Yes": 0.9775423697980916, "No": 0.022457038387708204}, "ground_truth": 1}, {"key": "35176615", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9831301379914089, "res": {"Yes": 0.9831301379914089, "No": 0.016869488550250264}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8957792223471337, "res": {"Yes": 0.8957792223471337, "No": 0.10421939860033654}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9554227588795725, "res": {"Yes": 0.9554227588795725, "No": 0.044575731715013604}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.6227516657587427, "res": {"Yes": 0.6227516657587427, "No": 0.3772456772430855}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9864775386564161, "res": {"Yes": 0.9864775386564161, "No": 0.013519795962665782}, "ground_truth": 1}, {"key": "33296389", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9259302380990488, "res": {"Yes": 0.9259302380990488, "No": 0.0740669696328917}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.5920650608809317, "res": {"Yes": 0.5920650608809317, "No": 0.4079333115585896}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9996942589483242, "res": {"Yes": 0.9996942589483242, "No": 0.0003052858521834367}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986901415144835, "res": {"Yes": 0.9986901415144835, "No": 0.0013097950557505167}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995327362317332, "res": {"Yes": 0.9995327362317332, "No": 0.0004671823058126331}, "ground_truth": 1}, {"key": "35399504", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.996923758656603, "res": {"Yes": 0.996923758656603, "No": 0.003076200645837688}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990718168287361, "res": {"Yes": 0.9990718168287361, "No": 0.0009270500523045444}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9645085355120308, "res": {"Yes": 0.9645085355120308, "No": 0.03549025241726689}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.995010127980972, "res": {"Yes": 0.995010127980972, "No": 0.004988982489155779}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992868689289159, "res": {"Yes": 0.9992868689289159, "No": 0.0007127498789547415}, "ground_truth": 1}, {"key": "34807886", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986525780891632, "res": {"Yes": 0.9986525780891632, "No": 0.0013469466350102429}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9975019040478905, "res": {"Yes": 0.9975019040478905, "No": 0.0024975775421890462}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.02226932740522241, "res": {"No": 0.9777275191915014, "Yes": 0.02226932740522241}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9924971370592648, "res": {"Yes": 0.9924971370592648, "No": 0.007502679916982016}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9967780532463983, "res": {"Yes": 0.9967780532463983, "No": 0.0032219081557095896}, "ground_truth": 1}, {"key": "37629813", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9872172828948607, "res": {"Yes": 0.9872172828948607, "No": 0.012782400693272614}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9969181918498772, "res": {"Yes": 0.9969181918498772, "No": 0.0030816093568626236}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.18650315931778597, "res": {"No": 0.8134959425349154, "Yes": 0.18650315931778597}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992497375876955, "res": {"Yes": 0.9992497375876955, "No": 0.0007499884886637802}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999605891586276, "res": {"Yes": 0.9999605891586276, "No": 3.923626936635252e-05}, "ground_truth": 1}, {"key": "28084389", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993647278708389, "res": {"Yes": 0.9993647278708389, "No": 0.0006351877077484526}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9982749056886736, "res": {"Yes": 0.9982749056886736, "No": 0.0017250064316724442}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.3048508312137741, "res": {"No": 0.6951403787502636, "Yes": 0.3048508312137741}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.009270184469313953, "res": {"No": 0.9907260745795501, "Yes": 0.009270184469313953}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9486953851391067, "res": {"Yes": 0.9486953851391067, "No": 0.05130354119733506}, "ground_truth": 1}, {"key": "35391734", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5891660597543291, "res": {"Yes": 0.5891660597543291, "No": 0.41083064720425383}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9837835132447851, "res": {"Yes": 0.9837835132447851, "No": 0.016213400294413996}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8312980700936473, "res": {"Yes": 0.8312980700936473, "No": 0.1687004199673999}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9360259430077772, "res": {"Yes": 0.9360259430077772, "No": 0.06397107185959997}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8871093043577531, "res": {"Yes": 0.8871093043577531, "No": 0.1128887566035006}, "ground_truth": 1}, {"key": "40214591", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9268284313457691, "res": {"Yes": 0.9268284313457691, "No": 0.0731692008479313}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9021573125217828, "res": {"Yes": 0.9021573125217828, "No": 0.09783971505151402}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.34666339952720276, "res": {"No": 0.6533349636801477, "Yes": 0.34666339952720276}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9746989801124403, "res": {"Yes": 0.9746989801124403, "No": 0.025299298775909033}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9825375766656028, "res": {"Yes": 0.9825375766656028, "No": 0.017461968351786754}, "ground_truth": 1}, {"key": "26283171", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9979768142050651, "res": {"Yes": 0.9979768142050651, "No": 0.002022983858135777}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9335439240756855, "res": {"Yes": 0.9335439240756855, "No": 0.06645571453523441}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9926189314733576, "res": {"Yes": 0.9926189314733576, "No": 0.007379296279482596}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9982440178438879, "res": {"Yes": 0.9982440178438879, "No": 0.001755227676989905}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9901404383450997, "res": {"Yes": 0.9901404383450997, "No": 0.009858127865979513}, "ground_truth": 1}, {"key": "37084030", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9944293141365066, "res": {"Yes": 0.9944293141365066, "No": 0.005569881499749749}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9915732537565216, "res": {"Yes": 0.9915732537565216, "No": 0.008425352195477088}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8513544220217016, "res": {"Yes": 0.8513544220217016, "No": 0.1486423937054387}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.988533386529995, "res": {"Yes": 0.988533386529995, "No": 0.011466214926404851}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9364382746015436, "res": {"Yes": 0.9364382746015436, "No": 0.06356106383255662}, "ground_truth": 1}, {"key": "39027295", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9754299088621536, "res": {"Yes": 0.9754299088621536, "No": 0.0245695624723688}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.7997660512655448, "res": {"Yes": 0.7997660512655448, "No": 0.20022907357348982}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9971919441849819, "res": {"Yes": 0.9971919441849819, "No": 0.00280705749244668}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9983486780349544, "res": {"Yes": 0.9983486780349544, "No": 0.0016509891266226916}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995492906000109, "res": {"Yes": 0.9995492906000109, "No": 0.0004504146297425067}, "ground_truth": 1}, {"key": "14018647", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988742280572225, "res": {"Yes": 0.9988742280572225, "No": 0.0011257050821006495}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994131811055954, "res": {"Yes": 0.9994131811055954, "No": 0.0005864221706736757}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9064591891893081, "res": {"Yes": 0.9064591891893081, "No": 0.0935396844605907}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.6289817921487263, "res": {"Yes": 0.6289817921487263, "No": 0.3710139930388961}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7807707626194615, "res": {"Yes": 0.7807707626194615, "No": 0.21922821809682713}, "ground_truth": 1}, {"key": "37424289", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7713327933204523, "res": {"Yes": 0.7713327933204523, "No": 0.22866665292937108}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8021411332313456, "res": {"Yes": 0.8021411332313456, "No": 0.19785734257741164}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9977975737077686, "res": {"Yes": 0.9977975737077686, "No": 0.002201597600753216}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998205511928636, "res": {"Yes": 0.9998205511928636, "No": 0.00017873165697097047}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998894299073233, "res": {"Yes": 0.9998894299073233, "No": 0.00011020139527808264}, "ground_truth": 1}, {"key": "37498031", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998601168145652, "res": {"Yes": 0.9998601168145652, "No": 0.0001394129731171439}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994330693159652, "res": {"Yes": 0.9994330693159652, "No": 0.0005661472242889764}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 4.619953120500142e-06, "res": {"No": 0.9999944422379444, "Yes": 4.619953120500142e-06}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.953721875646661, "res": {"Yes": 0.953721875646661, "No": 0.04627721294759394}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9765155788785362, "res": {"Yes": 0.9765155788785362, "No": 0.02348325417832558}, "ground_truth": 1}, {"key": "30104095", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9898481187766157, "res": {"Yes": 0.9898481187766157, "No": 0.010151119337683066}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9889149193562948, "res": {"Yes": 0.9889149193562948, "No": 0.011084355269855413}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9212996525476636, "res": {"Yes": 0.9212996525476636, "No": 0.07869894728111677}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9750048662218262, "res": {"Yes": 0.9750048662218262, "No": 0.024992464988604555}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8395847454761193, "res": {"Yes": 0.8395847454761193, "No": 0.1604118513591804}, "ground_truth": 1}, {"key": "37911407", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.980569827171778, "res": {"Yes": 0.980569827171778, "No": 0.019429137630105904}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9562142692783946, "res": {"Yes": 0.9562142692783946, "No": 0.04378499938735806}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9286081562976387, "res": {"Yes": 0.9286081562976387, "No": 0.07139165451581596}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.983662620566076, "res": {"Yes": 0.983662620566076, "No": 0.016337181422375616}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985977776337792, "res": {"Yes": 0.9985977776337792, "No": 0.0014020016688993666}, "ground_truth": 1}, {"key": "39177472", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991473741321788, "res": {"Yes": 0.9991473741321788, "No": 0.00085222183100049}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9953242813663673, "res": {"Yes": 0.9953242813663673, "No": 0.004675622359851133}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5683029726026849, "res": {"Yes": 0.5683029726026849, "No": 0.4316960613687857}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.971981458811932, "res": {"Yes": 0.971981458811932, "No": 0.028018210645949608}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9802200075630491, "res": {"Yes": 0.9802200075630491, "No": 0.019779678632748824}, "ground_truth": 1}, {"key": "32325454", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9894518441259877, "res": {"Yes": 0.9894518441259877, "No": 0.010547884083297026}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8354164800307341, "res": {"Yes": 0.8354164800307341, "No": 0.16458296325741253}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9874150605887483, "res": {"Yes": 0.9874150605887483, "No": 0.012584684754667685}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9963952767704077, "res": {"Yes": 0.9963952767704077, "No": 0.0036045187943432424}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9946229243037752, "res": {"Yes": 0.9946229243037752, "No": 0.005376154163103051}, "ground_truth": 1}, {"key": "38395319", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9979309871639999, "res": {"Yes": 0.9979309871639999, "No": 0.002068682391952738}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9961114233711977, "res": {"Yes": 0.9961114233711977, "No": 0.003888195930525438}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9917671629651063, "res": {"Yes": 0.9917671629651063, "No": 0.008231413741544374}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9775124115161524, "res": {"Yes": 0.9775124115161524, "No": 0.022483697228949055}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9765075070341221, "res": {"Yes": 0.9765075070341221, "No": 0.02349094150538059}, "ground_truth": 1}, {"key": "38235895", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9930568949064681, "res": {"Yes": 0.9930568949064681, "No": 0.006940530961286357}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9869891440947248, "res": {"Yes": 0.9869891440947248, "No": 0.013008750923780059}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.17599301011294768, "res": {"No": 0.8240044406078416, "Yes": 0.17599301011294768}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986715959108841, "res": {"Yes": 0.9986715959108841, "No": 0.0013283124194316913}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989910401426298, "res": {"Yes": 0.9989910401426298, "No": 0.0010088199475159372}, "ground_truth": 1}, {"key": "26543267", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991332193715434, "res": {"Yes": 0.9991332193715434, "No": 0.000866736975791561}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9629696151175698, "res": {"Yes": 0.9629696151175698, "No": 0.03702888482340887}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.37915588211624024, "res": {"No": 0.620832835050547, "Yes": 0.37915588211624024}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.7305992946944101, "res": {"Yes": 0.7305992946944101, "No": 0.2693934846413696}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8630959173365128, "res": {"Yes": 0.8630959173365128, "No": 0.13689436622747575}, "ground_truth": 1}, {"key": "39054728", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5808231833685611, "res": {"Yes": 0.5808231833685611, "No": 0.4191723278342333}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.4964876350681876, "res": {"No": 0.5035065288379714, "Yes": 0.4964876350681876}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9854246440508978, "res": {"Yes": 0.9854246440508978, "No": 0.014571175245748393}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8564874362803323, "res": {"Yes": 0.8564874362803323, "No": 0.1435101233898501}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9891890825911053, "res": {"Yes": 0.9891890825911053, "No": 0.010809823229093666}, "ground_truth": 1}, {"key": "39158443", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9678616165834384, "res": {"Yes": 0.9678616165834384, "No": 0.03213510926429161}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.5468077723428033, "res": {"Yes": 0.5468077723428033, "No": 0.45318988914296704}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5926657489683217, "res": {"Yes": 0.5926657489683217, "No": 0.4073328151725492}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.7412205645612512, "res": {"Yes": 0.7412205645612512, "No": 0.2587754194001713}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8843429580386023, "res": {"Yes": 0.8843429580386023, "No": 0.11565502976310946}, "ground_truth": 1}, {"key": "36254201", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.976059141115155, "res": {"Yes": 0.976059141115155, "No": 0.023938697193174742}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9277670605900502, "res": {"Yes": 0.9277670605900502, "No": 0.07223193628973978}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 2.384767537313613e-07, "res": {"No": 0.9999994487765019, "Yes": 2.384767537313613e-07}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9940619974405712, "res": {"Yes": 0.9940619974405712, "No": 0.0059379162904697426}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9915761868345442, "res": {"Yes": 0.9915761868345442, "No": 0.008423515668887143}, "ground_truth": 1}, {"key": "23434347", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985392829871431, "res": {"Yes": 0.9985392829871431, "No": 0.0014606043362039702}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9985698336639972, "res": {"Yes": 0.9985698336639972, "No": 0.0014299139662239087}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8378478820313251, "res": {"Yes": 0.8378478820313251, "No": 0.16214995885100678}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9896918558708816, "res": {"Yes": 0.9896918558708816, "No": 0.010307499931797528}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9344299170779218, "res": {"Yes": 0.9344299170779218, "No": 0.06556849221594838}, "ground_truth": 1}, {"key": "34397620", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.973400990515585, "res": {"Yes": 0.973400990515585, "No": 0.026598299335341382}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9105799543464157, "res": {"Yes": 0.9105799543464157, "No": 0.0894187667436294}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.938343145608045, "res": {"Yes": 0.938343145608045, "No": 0.061649139949769595}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996744810427962, "res": {"Yes": 0.9996744810427962, "No": 0.00032540662624602144}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.996465463631724, "res": {"Yes": 0.996465463631724, "No": 0.0035339837616063983}, "ground_truth": 1}, {"key": "34340916", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987629170188039, "res": {"Yes": 0.9987629170188039, "No": 0.0012363675420901344}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9910027645491358, "res": {"Yes": 0.9910027645491358, "No": 0.008996674229450783}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5324158617426115, "res": {"Yes": 0.5324158617426115, "No": 0.4675827876728631}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.32438383124969616, "res": {"No": 0.6756150755393587, "Yes": 0.32438383124969616}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9805582506327348, "res": {"Yes": 0.9805582506327348, "No": 0.01944136240196294}, "ground_truth": 1}, {"key": "30375089", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9835220337478102, "res": {"Yes": 0.9835220337478102, "No": 0.0164775999369568}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.6724301762729297, "res": {"Yes": 0.6724301762729297, "No": 0.327568790053012}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8473944671652358, "res": {"Yes": 0.8473944671652358, "No": 0.15260382422402727}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9508188198423448, "res": {"Yes": 0.9508188198423448, "No": 0.04917750379445686}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.90901015340914, "res": {"Yes": 0.90901015340914, "No": 0.09098736757122587}, "ground_truth": 1}, {"key": "35807797", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9277891194419174, "res": {"Yes": 0.9277891194419174, "No": 0.07220996667646805}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9508124645905922, "res": {"Yes": 0.9508124645905922, "No": 0.04918636637913554}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9788178519220466, "res": {"Yes": 0.9788178519220466, "No": 0.02118117210840325}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991096598282272, "res": {"Yes": 0.9991096598282272, "No": 0.0008900806334143455}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9962956356489897, "res": {"Yes": 0.9962956356489897, "No": 0.003703985044012342}, "ground_truth": 1}, {"key": "34188172", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.985644623264494, "res": {"Yes": 0.985644623264494, "No": 0.014354927728629705}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995606062817066, "res": {"Yes": 0.9995606062817066, "No": 0.00043912049784056823}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9763846398484172, "res": {"Yes": 0.9763846398484172, "No": 0.02361493802567957}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8999450077721928, "res": {"Yes": 0.8999450077721928, "No": 0.10005427402242516}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8398145544748902, "res": {"Yes": 0.8398145544748902, "No": 0.16018361948204463}, "ground_truth": 1}, {"key": "37075567", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9101786181964592, "res": {"Yes": 0.9101786181964592, "No": 0.08981753998267944}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.6677265336687938, "res": {"Yes": 0.6677265336687938, "No": 0.33227175690375127}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9275922486364522, "res": {"Yes": 0.9275922486364522, "No": 0.07240644240896697}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9971092111998886, "res": {"Yes": 0.9971092111998886, "No": 0.0028900513755756784}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996903264587523, "res": {"Yes": 0.9996903264587523, "No": 0.00030915802624402846}, "ground_truth": 1}, {"key": "35559735", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999216050778884, "res": {"Yes": 0.999216050778884, "No": 0.0007830844604801831}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9974569458547525, "res": {"Yes": 0.9974569458547525, "No": 0.002542829097177734}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.1930811137253073, "res": {"No": 0.8069174164205803, "Yes": 0.1930811137253073}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9611953238885484, "res": {"Yes": 0.9611953238885484, "No": 0.03880438590316269}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9370093951059619, "res": {"Yes": 0.9370093951059619, "No": 0.0629894390984867}, "ground_truth": 1}, {"key": "33005019", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9970013659111735, "res": {"Yes": 0.9970013659111735, "No": 0.00299815553818116}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8989402135530401, "res": {"Yes": 0.8989402135530401, "No": 0.10105894687119023}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.43913210000435526, "res": {"No": 0.5608556022974441, "Yes": 0.43913210000435526}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9956667669642941, "res": {"Yes": 0.9956667669642941, "No": 0.004331842100799321}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9712624576240393, "res": {"Yes": 0.9712624576240393, "No": 0.028736743894504934}, "ground_truth": 1}, {"key": "30808252", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9701594029963178, "res": {"Yes": 0.9701594029963178, "No": 0.029839809133424287}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9812790462988114, "res": {"Yes": 0.9812790462988114, "No": 0.018719670679396703}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7806590032841578, "res": {"Yes": 0.7806590032841578, "No": 0.21933985559757482}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9642877876670013, "res": {"Yes": 0.9642877876670013, "No": 0.03571161832015784}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9826740802670738, "res": {"Yes": 0.9826740802670738, "No": 0.017325244871654884}, "ground_truth": 1}, {"key": "15159017", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9079905027154752, "res": {"Yes": 0.9079905027154752, "No": 0.09200854737487256}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9948792593238036, "res": {"Yes": 0.9948792593238036, "No": 0.005119436011510366}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9210227920174233, "res": {"Yes": 0.9210227920174233, "No": 0.07897630564871036}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9962809640086002, "res": {"Yes": 0.9962809640086002, "No": 0.00371859637666602}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984356556641734, "res": {"Yes": 0.9984356556641734, "No": 0.0015643055366878205}, "ground_truth": 1}, {"key": "24493400", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9970366843131153, "res": {"Yes": 0.9970366843131153, "No": 0.0029630021048884193}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998199552799963, "res": {"Yes": 0.9998199552799963, "No": 0.0001794641664086581}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.11844094465144868, "res": {"No": 0.8815565739679815, "Yes": 0.11844094465144868}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9818075769315029, "res": {"Yes": 0.9818075769315029, "No": 0.018191069356995365}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987116673140363, "res": {"Yes": 0.9987116673140363, "No": 0.001287147177338537}, "ground_truth": 1}, {"key": "37791071", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9877625777830072, "res": {"Yes": 0.9877625777830072, "No": 0.012237214516228424}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.986647507100406, "res": {"Yes": 0.986647507100406, "No": 0.013352355069418087}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8350065164750478, "res": {"Yes": 0.8350065164750478, "No": 0.16499241101298556}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9025066322938983, "res": {"Yes": 0.9025066322938983, "No": 0.09749226865193593}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9948802043600609, "res": {"Yes": 0.9948802043600609, "No": 0.005119302907904421}, "ground_truth": 1}, {"key": "33528627", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9594960473808813, "res": {"Yes": 0.9594960473808813, "No": 0.04050349549805764}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9833487504652986, "res": {"Yes": 0.9833487504652986, "No": 0.016650660394648527}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9731562907476037, "res": {"Yes": 0.9731562907476037, "No": 0.026842736930558886}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9927636583194432, "res": {"Yes": 0.9927636583194432, "No": 0.007235882137005517}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9964258149600971, "res": {"Yes": 0.9964258149600971, "No": 0.0035741496187949432}, "ground_truth": 1}, {"key": "39925662", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9894967751649066, "res": {"Yes": 0.9894967751649066, "No": 0.010502516330567733}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9532861843832787, "res": {"Yes": 0.9532861843832787, "No": 0.04671231708217424}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8607725491595422, "res": {"Yes": 0.8607725491595422, "No": 0.13922390701931944}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9940389185710887, "res": {"Yes": 0.9940389185710887, "No": 0.00596101685024687}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9610747350822323, "res": {"Yes": 0.9610747350822323, "No": 0.03892478829770197}, "ground_truth": 1}, {"key": "29213416", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.993518066952102, "res": {"Yes": 0.993518066952102, "No": 0.006477552762872569}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9744691264137118, "res": {"Yes": 0.9744691264137118, "No": 0.02552538070646328}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7771184460605135, "res": {"Yes": 0.7771184460605135, "No": 0.22288101833004534}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9965891787116847, "res": {"Yes": 0.9965891787116847, "No": 0.0034106206344838904}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9873304511362911, "res": {"Yes": 0.9873304511362911, "No": 0.012668857023061775}, "ground_truth": 1}, {"key": "34492745", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9884540599087491, "res": {"Yes": 0.9884540599087491, "No": 0.011545350236460595}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9798746761251551, "res": {"Yes": 0.9798746761251551, "No": 0.02012503734554454}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.09495187449518164, "res": {"No": 0.9050462152603885, "Yes": 0.09495187449518164}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9818392986649519, "res": {"Yes": 0.9818392986649519, "No": 0.01816031972430575}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9970766204202639, "res": {"Yes": 0.9970766204202639, "No": 0.002922606277528611}, "ground_truth": 1}, {"key": "34191937", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9691476035953067, "res": {"Yes": 0.9691476035953067, "No": 0.030851743957549634}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9594336015722671, "res": {"Yes": 0.9594336015722671, "No": 0.040565797237259775}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0015290540542172962, "res": {"No": 0.9984701199614157, "Yes": 0.0015290540542172962}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9438941550601009, "res": {"Yes": 0.9438941550601009, "No": 0.05610419455484462}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8705251924377041, "res": {"Yes": 0.8705251924377041, "No": 0.12947250978882838}, "ground_truth": 1}, {"key": "34933372", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9295906565299784, "res": {"Yes": 0.9295906565299784, "No": 0.07040805533441714}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9866938164879089, "res": {"Yes": 0.9866938164879089, "No": 0.013304832019481226}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.001914100077927243, "res": {"No": 0.998085689234601, "Yes": 0.001914100077927243}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.004559868926762469, "res": {"No": 0.9954397990353783, "Yes": 0.004559868926762469}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.40205036988133713, "res": {"No": 0.5979482902046739, "Yes": 0.40205036988133713}, "ground_truth": 1}, {"key": "38714379", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.059430922931506404, "res": {"No": 0.9405679161838019, "Yes": 0.059430922931506404}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.018116495538945763, "res": {"No": 0.9818826262254957, "Yes": 0.018116495538945763}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8872168276340572, "res": {"Yes": 0.8872168276340572, "No": 0.1127819015548484}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.7213409273426085, "res": {"Yes": 0.7213409273426085, "No": 0.27865495025191117}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6733564272143903, "res": {"Yes": 0.6733564272143903, "No": 0.32664023909103507}, "ground_truth": 1}, {"key": "39220660", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.447326466872429, "res": {"No": 0.5526695305678873, "Yes": 0.447326466872429}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.35531384842674285, "res": {"No": 0.6446800704813337, "Yes": 0.35531384842674285}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.3238370570600821, "res": {"No": 0.676161017320299, "Yes": 0.3238370570600821}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9696677353902644, "res": {"Yes": 0.9696677353902644, "No": 0.030331130034403235}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.46045103454701897, "res": {"No": 0.5395470973218234, "Yes": 0.46045103454701897}, "ground_truth": 1}, {"key": "41028780", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.37697258990485066, "res": {"No": 0.6230253442767897, "Yes": 0.37697258990485066}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9827033250828633, "res": {"Yes": 0.9827033250828633, "No": 0.017295718390295662}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.026228748468119595, "res": {"No": 0.9737698138690867, "Yes": 0.026228748468119595}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9862712142901569, "res": {"Yes": 0.9862712142901569, "No": 0.013728463138867329}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9961360337036461, "res": {"Yes": 0.9961360337036461, "No": 0.0038630996485213716}, "ground_truth": 1}, {"key": "39457108", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9969396355914247, "res": {"Yes": 0.9969396355914247, "No": 0.003059280259492725}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9599884076094369, "res": {"Yes": 0.9599884076094369, "No": 0.040010014248083405}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.903736945885686, "res": {"Yes": 0.903736945885686, "No": 0.09625951052946098}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9945083044956484, "res": {"Yes": 0.9945083044956484, "No": 0.0054908912281308895}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9858115306850008, "res": {"Yes": 0.9858115306850008, "No": 0.01418760759472655}, "ground_truth": 1}, {"key": "38288018", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8804363694851771, "res": {"Yes": 0.8804363694851771, "No": 0.11955977780588525}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9803186373608453, "res": {"Yes": 0.9803186373608453, "No": 0.019679381808915587}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.012946743365887666, "res": {"No": 0.9870512784975357, "Yes": 0.012946743365887666}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992999610416841, "res": {"Yes": 0.9992999610416841, "No": 0.0006993704393192184}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9842359737089905, "res": {"Yes": 0.9842359737089905, "No": 0.01576262574885754}, "ground_truth": 1}, {"key": "40106293", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9931371918671771, "res": {"Yes": 0.9931371918671771, "No": 0.006862112088569276}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9521952344620458, "res": {"Yes": 0.9521952344620458, "No": 0.04780325239801443}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.07195076595850536, "res": {"No": 0.9280487919924665, "Yes": 0.07195076595850536}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.984018697092636, "res": {"Yes": 0.984018697092636, "No": 0.015980900316892335}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985996712765404, "res": {"Yes": 0.9985996712765404, "No": 0.0013995890796226523}, "ground_truth": 1}, {"key": "39948797", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9974098677959059, "res": {"Yes": 0.9974098677959059, "No": 0.0025899829145274436}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9941576701732591, "res": {"Yes": 0.9941576701732591, "No": 0.005839932736096655}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.3677897998748645, "res": {"No": 0.6321978563402662, "Yes": 0.3677897998748645}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.817315851245914, "res": {"Yes": 0.817315851245914, "No": 0.18268323175933}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9988318877748422, "res": {"Yes": 0.9988318877748422, "No": 0.0011677511193847337}, "ground_truth": 1}, {"key": "31853399", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9904797116413034, "res": {"Yes": 0.9904797116413034, "No": 0.00951967470886038}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9839019000213741, "res": {"Yes": 0.9839019000213741, "No": 0.01609726334517937}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8616873216740396, "res": {"Yes": 0.8616873216740396, "No": 0.13830571748325046}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8684735487597096, "res": {"Yes": 0.8684735487597096, "No": 0.13152517510793318}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9776349865493925, "res": {"Yes": 0.9776349865493925, "No": 0.02236213393216523}, "ground_truth": 1}, {"key": "35273252", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9952765756215383, "res": {"Yes": 0.9952765756215383, "No": 0.004721136615526961}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.5451717480099281, "res": {"Yes": 0.5451717480099281, "No": 0.45482565922350066}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9692337142469138, "res": {"Yes": 0.9692337142469138, "No": 0.03076140682061255}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9911216186733846, "res": {"Yes": 0.9911216186733846, "No": 0.008875168126225793}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9832580407271192, "res": {"Yes": 0.9832580407271192, "No": 0.01673853052553496}, "ground_truth": 1}, {"key": "37130459", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9971928879278845, "res": {"Yes": 0.9971928879278845, "No": 0.0028040849899946692}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9924887947588892, "res": {"Yes": 0.9924887947588892, "No": 0.007509858912503061}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.988727147186746, "res": {"Yes": 0.988727147186746, "No": 0.011272501664599322}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986322475271955, "res": {"Yes": 0.9986322475271955, "No": 0.001367541057982531}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991739195924446, "res": {"Yes": 0.9991739195924446, "No": 0.0008256237472699699}, "ground_truth": 1}, {"key": "21734003", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999628860289388, "res": {"Yes": 0.999628860289388, "No": 0.0003709081871399107}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9981307013191191, "res": {"Yes": 0.9981307013191191, "No": 0.0018687098612233132}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.420029324459655, "res": {"No": 0.5799694553604868, "Yes": 0.420029324459655}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.3912097178500601, "res": {"No": 0.6087880812948916, "Yes": 0.3912097178500601}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9820455046046527, "res": {"Yes": 0.9820455046046527, "No": 0.017954365325382474}, "ground_truth": 1}, {"key": "33990737", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9429769008837199, "res": {"Yes": 0.9429769008837199, "No": 0.05702209241566466}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.23957077363536422, "res": {"No": 0.7604283393950012, "Yes": 0.23957077363536422}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.009093670108214002, "res": {"No": 0.9909051377478555, "Yes": 0.009093670108214002}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.957131364470603, "res": {"Yes": 0.957131364470603, "No": 0.04286698198845612}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8916715960990838, "res": {"Yes": 0.8916715960990838, "No": 0.10832730857725582}, "ground_truth": 1}, {"key": "34559912", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6275395889538447, "res": {"Yes": 0.6275395889538447, "No": 0.3724593219292826}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.866161732303971, "res": {"Yes": 0.866161732303971, "No": 0.1338367640962281}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8711904929628557, "res": {"Yes": 0.8711904929628557, "No": 0.12880856101667282}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9010610190239158, "res": {"Yes": 0.9010610190239158, "No": 0.09893798888512378}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9132063452338011, "res": {"Yes": 0.9132063452338011, "No": 0.08679302653515146}, "ground_truth": 1}, {"key": "39820439", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9929314733860144, "res": {"Yes": 0.9929314733860144, "No": 0.007067950697512442}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9892630450454104, "res": {"Yes": 0.9892630450454104, "No": 0.01073530538988149}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9604261853124563, "res": {"Yes": 0.9604261853124563, "No": 0.03957277884481978}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9464952110370721, "res": {"Yes": 0.9464952110370721, "No": 0.053503634115211814}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9912252627064627, "res": {"Yes": 0.9912252627064627, "No": 0.008774360410315322}, "ground_truth": 1}, {"key": "34759328", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9950814195279707, "res": {"Yes": 0.9950814195279707, "No": 0.004918326247275972}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.5347077730050762, "res": {"Yes": 0.5347077730050762, "No": 0.4652839893636664}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9656907216079206, "res": {"Yes": 0.9656907216079206, "No": 0.03430792849719224}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9831123916694579, "res": {"Yes": 0.9831123916694579, "No": 0.016886564048000645}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9974848220737978, "res": {"Yes": 0.9974848220737978, "No": 0.002514637301363199}, "ground_truth": 1}, {"key": "36939137", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9699089353250612, "res": {"Yes": 0.9699089353250612, "No": 0.03008834173585138}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9802610110235468, "res": {"Yes": 0.9802610110235468, "No": 0.019737393405952767}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.4020522997277442, "res": {"No": 0.5979465860544751, "Yes": 0.4020522997277442}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9945085414870056, "res": {"Yes": 0.9945085414870056, "No": 0.0054886936642440255}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.985946056803835, "res": {"Yes": 0.985946056803835, "No": 0.014053685562302883}, "ground_truth": 1}, {"key": "35851522", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9888460309757813, "res": {"Yes": 0.9888460309757813, "No": 0.01115373678002468}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9972438677257847, "res": {"Yes": 0.9972438677257847, "No": 0.0027554706944240685}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9094174439063621, "res": {"Yes": 0.9094174439063621, "No": 0.09057884428393456}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990186422467884, "res": {"Yes": 0.9990186422467884, "No": 0.0009806366052526566}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9888534908583776, "res": {"Yes": 0.9888534908583776, "No": 0.011144892029371278}, "ground_truth": 1}, {"key": "22412782", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9963587076424061, "res": {"Yes": 0.9963587076424061, "No": 0.0036397302762876406}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9742158451116216, "res": {"Yes": 0.9742158451116216, "No": 0.025782745238480766}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.03666416615338659, "res": {"No": 0.9633121947024256, "Yes": 0.03666416615338659}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.47529533987848643, "res": {"No": 0.5246930258858346, "Yes": 0.47529533987848643}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.18586443780486836, "res": {"No": 0.8141033951315146, "Yes": 0.18586443780486836}, "ground_truth": 1}, {"key": "38579227", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.14438190095873885, "res": {"No": 0.8555948080414902, "Yes": 0.14438190095873885}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.16141462723858416, "res": {"No": 0.8385593156751785, "Yes": 0.16141462723858416}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7661836622736715, "res": {"Yes": 0.7661836622736715, "No": 0.23381281959239228}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8733343821156874, "res": {"Yes": 0.8733343821156874, "No": 0.12666281794958198}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7918051897197501, "res": {"Yes": 0.7918051897197501, "No": 0.20819142708372967}, "ground_truth": 1}, {"key": "37206995", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9557095758848236, "res": {"Yes": 0.9557095758848236, "No": 0.04428687151127085}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.09918300499334189, "res": {"No": 0.9008114777034394, "Yes": 0.09918300499334189}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8782208758125564, "res": {"Yes": 0.8782208758125564, "No": 0.1217536181008759}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.33196823246173635, "res": {"No": 0.6680232237212774, "Yes": 0.33196823246173635}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.10265978665734063, "res": {"No": 0.8973332766987056, "Yes": 0.10265978665734063}, "ground_truth": 1}, {"key": "38700847", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7217661821333197, "res": {"Yes": 0.7217661821333197, "No": 0.2782263744690189}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.717559563114622, "res": {"Yes": 0.717559563114622, "No": 0.2824156661122473}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9900685725997059, "res": {"Yes": 0.9900685725997059, "No": 0.009927034361124645}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9590871905840188, "res": {"Yes": 0.9590871905840188, "No": 0.040907256955406}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9849905172635436, "res": {"Yes": 0.9849905172635436, "No": 0.015004692402000419}, "ground_truth": 1}, {"key": "20246590", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9967899024162563, "res": {"Yes": 0.9967899024162563, "No": 0.003208222607564171}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9214657271701475, "res": {"Yes": 0.9214657271701475, "No": 0.07853078228797955}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6246463808535347, "res": {"Yes": 0.6246463808535347, "No": 0.3753484967124196}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9432075076745456, "res": {"Yes": 0.9432075076745456, "No": 0.056792000468253015}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9824122698491106, "res": {"Yes": 0.9824122698491106, "No": 0.017587356777509083}, "ground_truth": 1}, {"key": "39141360", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5023009183188286, "res": {"Yes": 0.5023009183188286, "No": 0.4976985233848342}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9773162962589459, "res": {"Yes": 0.9773162962589459, "No": 0.022682127486557933}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9900595679669868, "res": {"Yes": 0.9900595679669868, "No": 0.009939600005686}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.995578264784607, "res": {"Yes": 0.995578264784607, "No": 0.004421454343598948}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9931287265011464, "res": {"Yes": 0.9931287265011464, "No": 0.0068706710445487805}, "ground_truth": 1}, {"key": "37906226", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9873584571617312, "res": {"Yes": 0.9873584571617312, "No": 0.012640786109373619}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.6162641083580476, "res": {"Yes": 0.6162641083580476, "No": 0.38373170710449406}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.029157392391501932, "res": {"No": 0.970841715327333, "Yes": 0.029157392391501932}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.981492714613246, "res": {"Yes": 0.981492714613246, "No": 0.01850688229745562}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989770030185493, "res": {"Yes": 0.9989770030185493, "No": 0.001022392714752198}, "ground_truth": 1}, {"key": "16201033", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9943319516439904, "res": {"Yes": 0.9943319516439904, "No": 0.005666801101960811}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9967759178510622, "res": {"Yes": 0.9967759178510622, "No": 0.003223808030500454}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.03345032086758318, "res": {"No": 0.9665488252697972, "Yes": 0.03345032086758318}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9874780542481342, "res": {"Yes": 0.9874780542481342, "No": 0.012521559003699854}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5005322533727405, "res": {"Yes": 0.5005322533727405, "No": 0.49946414262841743}, "ground_truth": 1}, {"key": "36469022", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.1771746162900052, "res": {"No": 0.8228237967102171, "Yes": 0.1771746162900052}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8431825722437631, "res": {"Yes": 0.8431825722437631, "No": 0.15681547238644797}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.03101022469523591, "res": {"No": 0.9689856241111783, "Yes": 0.03101022469523591}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.7702333023204943, "res": {"Yes": 0.7702333023204943, "No": 0.2297614459123242}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7220066713753044, "res": {"Yes": 0.7220066713753044, "No": 0.27799120569014046}, "ground_truth": 1}, {"key": "31295270", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9607137031060103, "res": {"Yes": 0.9607137031060103, "No": 0.039285562264864377}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9289489344298951, "res": {"Yes": 0.9289489344298951, "No": 0.0710488223161533}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.07565781041443073, "res": {"No": 0.9243416634947297, "Yes": 0.07565781041443073}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9881182709098446, "res": {"Yes": 0.9881182709098446, "No": 0.011881472796051668}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9938800352423989, "res": {"Yes": 0.9938800352423989, "No": 0.006119560492424729}, "ground_truth": 1}, {"key": "35360689", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9938067984241239, "res": {"Yes": 0.9938067984241239, "No": 0.006192116055853247}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9047166845864117, "res": {"Yes": 0.9047166845864117, "No": 0.09528262537510515}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6051076154455429, "res": {"Yes": 0.6051076154455429, "No": 0.3948882166321014}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9909774774542196, "res": {"Yes": 0.9909774774542196, "No": 0.00902077525638079}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9873376576875542, "res": {"Yes": 0.9873376576875542, "No": 0.01266054771213646}, "ground_truth": 1}, {"key": "29202793", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9940760183863, "res": {"Yes": 0.9940760183863, "No": 0.005921668669239991}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9670391928486194, "res": {"Yes": 0.9670391928486194, "No": 0.032958113883058984}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.04116481056864576, "res": {"No": 0.9588322003256207, "Yes": 0.04116481056864576}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9786633410806196, "res": {"Yes": 0.9786633410806196, "No": 0.021334239684118295}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9931194392019732, "res": {"Yes": 0.9931194392019732, "No": 0.006879605285336853}, "ground_truth": 1}, {"key": "35999008", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9107492013577224, "res": {"Yes": 0.9107492013577224, "No": 0.08924895206142548}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9734892196860706, "res": {"Yes": 0.9734892196860706, "No": 0.026509842496106575}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.460684247572763, "res": {"No": 0.5393140093833464, "Yes": 0.460684247572763}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9976193425704264, "res": {"Yes": 0.9976193425704264, "No": 0.0023802498924090686}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989879476707526, "res": {"Yes": 0.9989879476707526, "No": 0.001011703232136196}, "ground_truth": 1}, {"key": "31797119", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985838614718363, "res": {"Yes": 0.9985838614718363, "No": 0.001415578496481568}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9778518705735701, "res": {"Yes": 0.9778518705735701, "No": 0.022146522335209754}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9124359434029702, "res": {"Yes": 0.9124359434029702, "No": 0.0875608148658254}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.91726837797281, "res": {"Yes": 0.91726837797281, "No": 0.0827283276944854}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6372545026497448, "res": {"Yes": 0.6372545026497448, "No": 0.36273809735855184}, "ground_truth": 1}, {"key": "26711893", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9781760450330835, "res": {"Yes": 0.9781760450330835, "No": 0.021821966039522127}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9821334634850534, "res": {"Yes": 0.9821334634850534, "No": 0.017864763960367683}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9936082099549026, "res": {"Yes": 0.9936082099549026, "No": 0.006391222591630947}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9941147786629313, "res": {"Yes": 0.9941147786629313, "No": 0.005883908658922475}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9903868654536031, "res": {"Yes": 0.9903868654536031, "No": 0.009611826611733147}, "ground_truth": 1}, {"key": "35348288", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9952315936203103, "res": {"Yes": 0.9952315936203103, "No": 0.004766651081524383}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.991207344489343, "res": {"Yes": 0.991207344489343, "No": 0.008790919194493603}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9887997540644198, "res": {"Yes": 0.9887997540644198, "No": 0.011199045808277013}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9789990204636325, "res": {"Yes": 0.9789990204636325, "No": 0.020999549371267977}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9957398114588469, "res": {"Yes": 0.9957398114588469, "No": 0.004259175423479526}, "ground_truth": 1}, {"key": "38124131", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990999048288999, "res": {"Yes": 0.9990999048288999, "No": 0.0008998567265812176}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9859380588418619, "res": {"Yes": 0.9859380588418619, "No": 0.014060434356659797}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.000918798609993857, "res": {"No": 0.9990807451738288, "Yes": 0.000918798609993857}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9280272336694294, "res": {"Yes": 0.9280272336694294, "No": 0.07197133242622453}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9737384940404816, "res": {"Yes": 0.9737384940404816, "No": 0.026260743433945903}, "ground_truth": 1}, {"key": "20285901", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9945275258417496, "res": {"Yes": 0.9945275258417496, "No": 0.0054719075166593965}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.05474466817276937, "res": {"No": 0.9452542878632627, "Yes": 0.05474466817276937}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8275229152246152, "res": {"Yes": 0.8275229152246152, "No": 0.17247642819376824}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9787415580016429, "res": {"Yes": 0.9787415580016429, "No": 0.021257872109509048}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9912594665154926, "res": {"Yes": 0.9912594665154926, "No": 0.008740119645429346}, "ground_truth": 1}, {"key": "35633632", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7169094402897566, "res": {"Yes": 0.7169094402897566, "No": 0.2830891254501427}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9625906975874459, "res": {"Yes": 0.9625906975874459, "No": 0.037408078320930245}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9993537754530819, "res": {"Yes": 0.9993537754530819, "No": 0.00064589758752011}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994136576658903, "res": {"Yes": 0.9994136576658903, "No": 0.0005862747626699304}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985517701983712, "res": {"Yes": 0.9985517701983712, "No": 0.0014479637787599897}, "ground_truth": 1}, {"key": "10741274", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994973718898517, "res": {"Yes": 0.9994973718898517, "No": 0.0005025896630591146}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995838336812358, "res": {"Yes": 0.9995838336812358, "No": 0.00041603032654980685}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6419251569804375, "res": {"Yes": 0.6419251569804375, "No": 0.3580737887475499}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.955854298473911, "res": {"Yes": 0.955854298473911, "No": 0.044144960650426696}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9695085988239306, "res": {"Yes": 0.9695085988239306, "No": 0.030490634746877424}, "ground_truth": 1}, {"key": "30605795", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9300827949216616, "res": {"Yes": 0.9300827949216616, "No": 0.06991658432606175}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8578775974378273, "res": {"Yes": 0.8578775974378273, "No": 0.14212096753199502}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.985892757014791, "res": {"Yes": 0.985892757014791, "No": 0.01410636168524371}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9969616667053882, "res": {"Yes": 0.9969616667053882, "No": 0.003037589517402287}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9920660204885166, "res": {"Yes": 0.9920660204885166, "No": 0.0079335767870531}, "ground_truth": 1}, {"key": "30539722", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.985158474434828, "res": {"Yes": 0.985158474434828, "No": 0.014841242772079585}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9871995062333002, "res": {"Yes": 0.9871995062333002, "No": 0.012799736425837803}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9046177592257949, "res": {"Yes": 0.9046177592257949, "No": 0.09537692558131866}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9983070923874037, "res": {"Yes": 0.9983070923874037, "No": 0.0016922386189387579}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99872022431226, "res": {"Yes": 0.99872022431226, "No": 0.0012789700159684348}, "ground_truth": 1}, {"key": "18639299", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9968517242462585, "res": {"Yes": 0.9968517242462585, "No": 0.003147288470072394}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9973019607678416, "res": {"Yes": 0.9973019607678416, "No": 0.0026975643653621806}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9922776034086079, "res": {"Yes": 0.9922776034086079, "No": 0.007720844729591071}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9984566927253736, "res": {"Yes": 0.9984566927253736, "No": 0.001542878579320144}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9868838419187669, "res": {"Yes": 0.9868838419187669, "No": 0.013114581187861457}, "ground_truth": 1}, {"key": "39773552", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9963248606167582, "res": {"Yes": 0.9963248606167582, "No": 0.0036746918506361244}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9951235626160125, "res": {"Yes": 0.9951235626160125, "No": 0.0048744144235171195}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.38303220774812363, "res": {"No": 0.6169667168800901, "Yes": 0.38303220774812363}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9955423409866415, "res": {"Yes": 0.9955423409866415, "No": 0.004457332883554524}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9964436846223255, "res": {"Yes": 0.9964436846223255, "No": 0.0035559500788415964}, "ground_truth": 1}, {"key": "34086410", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9971524714222928, "res": {"Yes": 0.9971524714222928, "No": 0.002847182415087383}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9970531532019695, "res": {"Yes": 0.9970531532019695, "No": 0.0029464035145664812}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.996271143416656, "res": {"Yes": 0.996271143416656, "No": 0.003728544634067669}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.99927127180146, "res": {"Yes": 0.99927127180146, "No": 0.000728428342676127}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992612806875604, "res": {"Yes": 0.9992612806875604, "No": 0.0007385357856177187}, "ground_truth": 1}, {"key": "35454652", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991816541277748, "res": {"Yes": 0.9991816541277748, "No": 0.0008180588337894067}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997687155703684, "res": {"Yes": 0.9997687155703684, "No": 0.000231000195759296}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.019153025237189494, "res": {"No": 0.9808463633158102, "Yes": 0.019153025237189494}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9966374809719647, "res": {"Yes": 0.9966374809719647, "No": 0.003360801808477556}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9902752125613905, "res": {"Yes": 0.9902752125613905, "No": 0.009722511588588837}, "ground_truth": 1}, {"key": "36158310", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9916292861694684, "res": {"Yes": 0.9916292861694684, "No": 0.008370246035419929}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9914894604331147, "res": {"Yes": 0.9914894604331147, "No": 0.008507254575685304}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.013543217641942249, "res": {"No": 0.9864502821653413, "Yes": 0.013543217641942249}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9001321527935167, "res": {"Yes": 0.9001321527935167, "No": 0.09986076631956035}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9963292405701053, "res": {"Yes": 0.9963292405701053, "No": 0.0036701281473177083}, "ground_truth": 1}, {"key": "35688387", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9249994963096206, "res": {"Yes": 0.9249994963096206, "No": 0.0749981899302797}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.5554156314583475, "res": {"Yes": 0.5554156314583475, "No": 0.44457620451198065}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.10172417451457551, "res": {"No": 0.8982750507349838, "Yes": 0.10172417451457551}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987649410139061, "res": {"Yes": 0.9987649410139061, "No": 0.0012346542593986163}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.991080627722036, "res": {"Yes": 0.991080627722036, "No": 0.00891888803423525}, "ground_truth": 1}, {"key": "34209292", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9877535146090545, "res": {"Yes": 0.9877535146090545, "No": 0.012246023584917087}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997688347528041, "res": {"Yes": 0.9997688347528041, "No": 0.00023088864960530878}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.3951802851922086, "res": {"No": 0.6048187213395988, "Yes": 0.3951802851922086}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9972286919835254, "res": {"Yes": 0.9972286919835254, "No": 0.002771082059118698}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986868154823748, "res": {"Yes": 0.9986868154823748, "No": 0.001312891923095829}, "ground_truth": 1}, {"key": "25037859", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998607127413711, "res": {"Yes": 0.9998607127413711, "No": 0.00013908114752942137}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988147609512997, "res": {"Yes": 0.9988147609512997, "No": 0.0011846688439886056}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7019343344507321, "res": {"Yes": 0.7019343344507321, "No": 0.2980635679371067}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9577194687588196, "res": {"Yes": 0.9577194687588196, "No": 0.04228005572423536}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9952008911992344, "res": {"Yes": 0.9952008911992344, "No": 0.004798703988122507}, "ground_truth": 1}, {"key": "36412121", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9927702453281686, "res": {"Yes": 0.9927702453281686, "No": 0.007229336625962017}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9956919380370277, "res": {"Yes": 0.9956919380370277, "No": 0.004304672148430583}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9920619143357554, "res": {"Yes": 0.9920619143357554, "No": 0.007937144525143979}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9925586810350597, "res": {"Yes": 0.9925586810350597, "No": 0.007439331398264321}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9173038861189771, "res": {"Yes": 0.9173038861189771, "No": 0.0826952347112676}, "ground_truth": 1}, {"key": "34909172", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9903623061446257, "res": {"Yes": 0.9903623061446257, "No": 0.009636920226333034}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.985052160883944, "res": {"Yes": 0.985052160883944, "No": 0.014947184868306679}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.313521199244749, "res": {"No": 0.6864750487693717, "Yes": 0.313521199244749}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.13575965490614375, "res": {"No": 0.8642326548324711, "Yes": 0.13575965490614375}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.17457198515943326, "res": {"No": 0.8254194645346835, "Yes": 0.17457198515943326}, "ground_truth": 1}, {"key": "39011806", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.36182471396242355, "res": {"No": 0.6381669645201136, "Yes": 0.36182471396242355}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.08060261385762886, "res": {"No": 0.9193917250786815, "Yes": 0.08060261385762886}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.899660000730169, "res": {"Yes": 0.899660000730169, "No": 0.1003397752312296}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9983901464066914, "res": {"Yes": 0.9983901464066914, "No": 0.0016095413763111324}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.996535900584859, "res": {"Yes": 0.996535900584859, "No": 0.0034638278028888115}, "ground_truth": 1}, {"key": "33096163", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9854807882484798, "res": {"Yes": 0.9854807882484798, "No": 0.014519016587685112}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9983441632129365, "res": {"Yes": 0.9983441632129365, "No": 0.001655668208962332}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9083327525024053, "res": {"Yes": 0.9083327525024053, "No": 0.09166551808903277}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9967056910822755, "res": {"Yes": 0.9967056910822755, "No": 0.0032939000194820616}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9957383909374451, "res": {"Yes": 0.9957383909374451, "No": 0.004260925026082253}, "ground_truth": 1}, {"key": "38762205", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9968732887747435, "res": {"Yes": 0.9968732887747435, "No": 0.003126035783473202}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9924501775726935, "res": {"Yes": 0.9924501775726935, "No": 0.007549522218813268}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.024369313052160304, "res": {"No": 0.9756296915113548, "Yes": 0.024369313052160304}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9832036444639972, "res": {"Yes": 0.9832036444639972, "No": 0.01679561396786282}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8598528921549897, "res": {"Yes": 0.8598528921549897, "No": 0.1401466926912174}, "ground_truth": 1}, {"key": "35519177", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9670506755401679, "res": {"Yes": 0.9670506755401679, "No": 0.03294894621105901}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.7508750644339468, "res": {"Yes": 0.7508750644339468, "No": 0.2491244520375042}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9900776872129399, "res": {"Yes": 0.9900776872129399, "No": 0.009921521424896844}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998709628860046, "res": {"Yes": 0.9998709628860046, "No": 0.00012890008352969313}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9952965311169326, "res": {"Yes": 0.9952965311169326, "No": 0.004703200709601135}, "ground_truth": 1}, {"key": "36192531", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9983013912715394, "res": {"Yes": 0.9983013912715394, "No": 0.001698327390833887}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997715758127782, "res": {"Yes": 0.9997715758127782, "No": 0.0002278406675130907}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9925802916549434, "res": {"Yes": 0.9925802916549434, "No": 0.007418382036204855}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9980265654874951, "res": {"Yes": 0.9980265654874951, "No": 0.0019733118611253713}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990971656504007, "res": {"Yes": 0.9990971656504007, "No": 0.0009025422991421325}, "ground_truth": 1}, {"key": "33160852", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987461522248126, "res": {"Yes": 0.9987461522248126, "No": 0.0012533854859297499}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9935066521905492, "res": {"Yes": 0.9935066521905492, "No": 0.006492296232367404}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6466297531369947, "res": {"Yes": 0.6466297531369947, "No": 0.3533682021793374}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9472844436624768, "res": {"Yes": 0.9472844436624768, "No": 0.052715043961862544}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9861573430310263, "res": {"Yes": 0.9861573430310263, "No": 0.013842039180701093}, "ground_truth": 1}, {"key": "36312304", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9973498622316291, "res": {"Yes": 0.9973498622316291, "No": 0.002649929349249432}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9929206636977841, "res": {"Yes": 0.9929206636977841, "No": 0.007078827445744694}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9403299496893259, "res": {"Yes": 0.9403299496893259, "No": 0.05966960602703153}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9956021283847049, "res": {"Yes": 0.9956021283847049, "No": 0.0043968034091758324}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9809209212623083, "res": {"Yes": 0.9809209212623083, "No": 0.019078132406722972}, "ground_truth": 1}, {"key": "33773343", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9835598548312773, "res": {"Yes": 0.9835598548312773, "No": 0.016439246890231813}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9944917954964688, "res": {"Yes": 0.9944917954964688, "No": 0.005507290054179094}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.025540501313687552, "res": {"No": 0.9744588165848934, "Yes": 0.025540501313687552}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9549115171431627, "res": {"Yes": 0.9549115171431627, "No": 0.04508788021333055}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9609053096587571, "res": {"Yes": 0.9609053096587571, "No": 0.039093658963074915}, "ground_truth": 1}, {"key": "34913320", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9093612581947147, "res": {"Yes": 0.9093612581947147, "No": 0.09063823818058563}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.883878737966307, "res": {"Yes": 0.883878737966307, "No": 0.11612067812254125}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9855890384177166, "res": {"Yes": 0.9855890384177166, "No": 0.014410741686954645}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9960612901456554, "res": {"Yes": 0.9960612901456554, "No": 0.003938697082826951}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998785501401099, "res": {"Yes": 0.998785501401099, "No": 0.0012141243033860004}, "ground_truth": 1}, {"key": "33784155", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998384286940496, "res": {"Yes": 0.9998384286940496, "No": 0.00016131251298330822}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996124200785106, "res": {"Yes": 0.9996124200785106, "No": 0.00038720716425822766}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.4993698676806343, "res": {"No": 0.5006248954492423, "Yes": 0.4993698676806343}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.5376204236900141, "res": {"Yes": 0.5376204236900141, "No": 0.4623717767162839}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8604895564593072, "res": {"Yes": 0.8604895564593072, "No": 0.1395045028147099}, "ground_truth": 1}, {"key": "24085062", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7553270233520769, "res": {"Yes": 0.7553270233520769, "No": 0.24467029731101314}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9846886422682382, "res": {"Yes": 0.9846886422682382, "No": 0.015307383274820742}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.005297570286752366, "res": {"No": 0.9947020601489462, "Yes": 0.005297570286752366}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9863282638396479, "res": {"Yes": 0.9863282638396479, "No": 0.013671088561466177}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9976389169518178, "res": {"Yes": 0.9976389169518178, "No": 0.002360206886536997}, "ground_truth": 1}, {"key": "33893487", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9931371918671771, "res": {"Yes": 0.9931371918671771, "No": 0.006862635687696686}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.12930271444981165, "res": {"No": 0.8706946044173045, "Yes": 0.12930271444981165}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6679784943837719, "res": {"Yes": 0.6679784943837719, "No": 0.3320194591120396}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.01287401937991254, "res": {"No": 0.98712434175012, "Yes": 0.01287401937991254}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9727317641409562, "res": {"Yes": 0.9727317641409562, "No": 0.02726561212259235}, "ground_truth": 1}, {"key": "40913011", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9133591703169995, "res": {"Yes": 0.9133591703169995, "No": 0.08663653241510705}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.6778520704666624, "res": {"Yes": 0.6778520704666624, "No": 0.3221438803026269}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8136020781386534, "res": {"Yes": 0.8136020781386534, "No": 0.18639660321354282}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.7448321187172826, "res": {"Yes": 0.7448321187172826, "No": 0.25516438358652893}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7136185165306083, "res": {"Yes": 0.7136185165306083, "No": 0.28637787470638115}, "ground_truth": 1}, {"key": "29642545", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8768594251945396, "res": {"Yes": 0.8768594251945396, "No": 0.12313770463010934}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.990831512267868, "res": {"Yes": 0.990831512267868, "No": 0.009167392356019268}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9971817472514538, "res": {"Yes": 0.9971817472514538, "No": 0.0028179601692764115}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987356828232706, "res": {"Yes": 0.9987356828232706, "No": 0.0012639544756572676}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9960884936482038, "res": {"Yes": 0.9960884936482038, "No": 0.003910732040009443}, "ground_truth": 1}, {"key": "35969159", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990482586933288, "res": {"Yes": 0.9990482586933288, "No": 0.0009513679135924177}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9884921408349363, "res": {"Yes": 0.9884921408349363, "No": 0.011506613752633961}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.14873591918175816, "res": {"No": 0.8512565389176651, "Yes": 0.14873591918175816}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9881844978037508, "res": {"Yes": 0.9881844978037508, "No": 0.011812759714396933}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986948996851287, "res": {"Yes": 0.9986948996851287, "No": 0.001304699803893913}, "ground_truth": 1}, {"key": "37081669", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.993418883033515, "res": {"Yes": 0.993418883033515, "No": 0.0065798824404336995}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9971806807661454, "res": {"Yes": 0.9971806807661454, "No": 0.0028186027374449934}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.04944409211210997, "res": {"No": 0.9505549605004191, "Yes": 0.04944409211210997}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9873225466003412, "res": {"Yes": 0.9873225466003412, "No": 0.012677118543505112}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8008904882897699, "res": {"Yes": 0.8008904882897699, "No": 0.1991088736690155}, "ground_truth": 1}, {"key": "40048022", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9968400003420722, "res": {"Yes": 0.9968400003420722, "No": 0.0031594383642877424}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.7889162348463413, "res": {"Yes": 0.7889162348463413, "No": 0.21108294890205961}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8734719693176736, "res": {"Yes": 0.8734719693176736, "No": 0.12652775345207948}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996819847975578, "res": {"Yes": 0.9996819847975578, "No": 0.0003175313513807967}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9955066560127869, "res": {"Yes": 0.9955066560127869, "No": 0.004493227571244322}, "ground_truth": 1}, {"key": "32884004", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9941303332015023, "res": {"Yes": 0.9941303332015023, "No": 0.005869654532150645}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9900516267310405, "res": {"Yes": 0.9900516267310405, "No": 0.009948246246299319}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9711002023596149, "res": {"Yes": 0.9711002023596149, "No": 0.02889814132343448}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9975934768309583, "res": {"Yes": 0.9975934768309583, "No": 0.0024060882090166614}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9519128942628418, "res": {"Yes": 0.9519128942628418, "No": 0.048085504934469815}, "ground_truth": 1}, {"key": "39022490", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7525596623402817, "res": {"Yes": 0.7525596623402817, "No": 0.24743805775340383}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8877338627682693, "res": {"Yes": 0.8877338627682693, "No": 0.11226453196796303}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.3251264485044578, "res": {"No": 0.6748717541584881, "Yes": 0.3251264485044578}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8646318842247341, "res": {"Yes": 0.8646318842247341, "No": 0.13536718552240826}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9767739189927288, "res": {"Yes": 0.9767739189927288, "No": 0.023225905659862565}, "ground_truth": 1}, {"key": "35159385", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8486648068436213, "res": {"Yes": 0.8486648068436213, "No": 0.15133464533983398}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.3024078125071683, "res": {"No": 0.6975916333060009, "Yes": 0.3024078125071683}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9273053423538538, "res": {"Yes": 0.9273053423538538, "No": 0.07269315157856639}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.998818091005264, "res": {"Yes": 0.998818091005264, "No": 0.0011811077614428205}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9935766643886175, "res": {"Yes": 0.9935766643886175, "No": 0.006421176003115144}, "ground_truth": 1}, {"key": "34363669", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993677023844415, "res": {"Yes": 0.9993677023844415, "No": 0.0006319561178561292}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9984555025657053, "res": {"Yes": 0.9984555025657053, "No": 0.0015420000059228204}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.11077621431074963, "res": {"No": 0.8892231789825521, "Yes": 0.11077621431074963}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9906203142721065, "res": {"Yes": 0.9906203142721065, "No": 0.009378960903301085}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9976097319510203, "res": {"Yes": 0.9976097319510203, "No": 0.002388923578637633}, "ground_truth": 1}, {"key": "36119687", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9456662532604366, "res": {"Yes": 0.9456662532604366, "No": 0.0543325763037699}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9969741120549448, "res": {"Yes": 0.9969741120549448, "No": 0.003025739967675213}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9921392341900218, "res": {"Yes": 0.9921392341900218, "No": 0.00785996598624603}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998795371462323, "res": {"Yes": 0.9998795371462323, "No": 0.00012034727936678232}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998746577259993, "res": {"Yes": 0.9998746577259993, "No": 0.0001252285612894254}, "ground_truth": 1}, {"key": "35217446", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997646636059545, "res": {"Yes": 0.9997646636059545, "No": 0.00023519984522429256}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994202100431919, "res": {"Yes": 0.9994202100431919, "No": 0.0005795080523153429}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9233831886488351, "res": {"Yes": 0.9233831886488351, "No": 0.07661568179624091}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.7737519227582415, "res": {"Yes": 0.7737519227582415, "No": 0.22624230681677418}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9915875503627583, "res": {"Yes": 0.9915875503627583, "No": 0.008410414665249579}, "ground_truth": 1}, {"key": "39049331", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9085503660431222, "res": {"Yes": 0.9085503660431222, "No": 0.091445391415212}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.1928448884488784, "res": {"No": 0.8071269678018066, "Yes": 0.1928448884488784}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.984033697587988, "res": {"Yes": 0.984033697587988, "No": 0.015964448463724797}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9874839811092023, "res": {"Yes": 0.9874839811092023, "No": 0.012515088282352847}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8850999787640667, "res": {"Yes": 0.8850999787640667, "No": 0.11489836775683511}, "ground_truth": 1}, {"key": "36472242", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9320984377225047, "res": {"Yes": 0.9320984377225047, "No": 0.06789964993684974}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9897344719299994, "res": {"Yes": 0.9897344719299994, "No": 0.010263644375689513}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9181230279191924, "res": {"Yes": 0.9181230279191924, "No": 0.08187598127994311}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987040592574046, "res": {"Yes": 0.9987040592574046, "No": 0.0012958627363130037}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998808696066603, "res": {"Yes": 0.998808696066603, "No": 0.0011912230816229592}, "ground_truth": 1}, {"key": "31854721", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997803949167271, "res": {"Yes": 0.9997803949167271, "No": 0.00021952597397272678}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9981627717737616, "res": {"Yes": 0.9981627717737616, "No": 0.0018372102984766782}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.08923918876013229, "res": {"No": 0.9107258591548232, "Yes": 0.08923918876013229}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9894647981143276, "res": {"Yes": 0.9894647981143276, "No": 0.010534797010688884}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9975045089277639, "res": {"Yes": 0.9975045089277639, "No": 0.0024944413067598177}, "ground_truth": 1}, {"key": "18725849", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9978251025243363, "res": {"Yes": 0.9978251025243363, "No": 0.0021736597745004938}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9951825836520333, "res": {"Yes": 0.9951825836520333, "No": 0.004816130857196324}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9705883053217519, "res": {"Yes": 0.9705883053217519, "No": 0.029411147789298334}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9678589327068967, "res": {"Yes": 0.9678589327068967, "No": 0.03214078482572501}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9978347188107514, "res": {"Yes": 0.9978347188107514, "No": 0.0021650576277143854}, "ground_truth": 1}, {"key": "36883179", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989734342794782, "res": {"Yes": 0.9989734342794782, "No": 0.0010265077208542121}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.995444401063751, "res": {"Yes": 0.995444401063751, "No": 0.0045554028996884575}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9869696458232884, "res": {"Yes": 0.9869696458232884, "No": 0.013030149687485136}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9984415992694788, "res": {"Yes": 0.9984415992694788, "No": 0.00155811472215285}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9972091257507589, "res": {"Yes": 0.9972091257507589, "No": 0.002790514476414259}, "ground_truth": 1}, {"key": "34266359", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9904807665027581, "res": {"Yes": 0.9904807665027581, "No": 0.009517357902003072}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9920288276385454, "res": {"Yes": 0.9920288276385454, "No": 0.007970133786977588}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9806743712936782, "res": {"Yes": 0.9806743712936782, "No": 0.019324835438830346}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.993581961756083, "res": {"Yes": 0.993581961756083, "No": 0.006417610672147076}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987246255821145, "res": {"Yes": 0.9987246255821145, "No": 0.001274981640606163}, "ground_truth": 1}, {"key": "31920289", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.992022958813361, "res": {"Yes": 0.992022958813361, "No": 0.007976108843811712}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.981790686059245, "res": {"Yes": 0.981790686059245, "No": 0.018208322667522838}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.25791433089401095, "res": {"No": 0.742072842609818, "Yes": 0.25791433089401095}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9589003728419693, "res": {"Yes": 0.9589003728419693, "No": 0.04109712957266598}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9932912511125491, "res": {"Yes": 0.9932912511125491, "No": 0.006705878159377767}, "ground_truth": 1}, {"key": "36292997", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.995964906555811, "res": {"Yes": 0.995964906555811, "No": 0.00403372504971226}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9881956771982106, "res": {"Yes": 0.9881956771982106, "No": 0.011802306506017195}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.893355709956753, "res": {"Yes": 0.893355709956753, "No": 0.10664361548911779}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.989568094725592, "res": {"Yes": 0.989568094725592, "No": 0.010431479182860387}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995440480277076, "res": {"Yes": 0.9995440480277076, "No": 0.0004556555986160323}, "ground_truth": 1}, {"key": "30412533", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991278672290491, "res": {"Yes": 0.9991278672290491, "No": 0.0008718492739242947}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986827715080405, "res": {"Yes": 0.9986827715080405, "No": 0.0013169853237248225}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6925085461402791, "res": {"Yes": 0.6925085461402791, "No": 0.30748991162417805}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9687864259895597, "res": {"Yes": 0.9687864259895597, "No": 0.031212706732815693}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.988664215223615, "res": {"Yes": 0.988664215223615, "No": 0.011335249316323527}, "ground_truth": 1}, {"key": "40433191", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9960900336022054, "res": {"Yes": 0.9960900336022054, "No": 0.00390976894531797}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9902886566289344, "res": {"Yes": 0.9902886566289344, "No": 0.009710631138262872}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8535145580582685, "res": {"Yes": 0.8535145580582685, "No": 0.14648273606920728}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9896526203150076, "res": {"Yes": 0.9896526203150076, "No": 0.0103450255292635}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9705255364940278, "res": {"Yes": 0.9705255364940278, "No": 0.029471464171764133}, "ground_truth": 1}, {"key": "34565591", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9836197141383629, "res": {"Yes": 0.9836197141383629, "No": 0.016378840346179063}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9047117086583304, "res": {"Yes": 0.9047117086583304, "No": 0.09528428330721064}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.20090030278892296, "res": {"No": 0.7990991486406879, "Yes": 0.20090030278892296}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9983705389156284, "res": {"Yes": 0.9983705389156284, "No": 0.0016290412946344658}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983632904723178, "res": {"Yes": 0.9983632904723178, "No": 0.0016359984307521402}, "ground_truth": 1}, {"key": "36062480", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9967817253835106, "res": {"Yes": 0.9967817253835106, "No": 0.0032177198222632986}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9668116056845939, "res": {"Yes": 0.9668116056845939, "No": 0.033188269294501645}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.2900749909347294, "res": {"No": 0.7099220029367092, "Yes": 0.2900749909347294}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9784790956921661, "res": {"Yes": 0.9784790956921661, "No": 0.021519589912085062}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9928559012688274, "res": {"Yes": 0.9928559012688274, "No": 0.007143303032554542}, "ground_truth": 1}, {"key": "37276883", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9772433090941968, "res": {"Yes": 0.9772433090941968, "No": 0.022754180323083902}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9914055611704582, "res": {"Yes": 0.9914055611704582, "No": 0.00859318870994657}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.13450646690742576, "res": {"No": 0.865492750919021, "Yes": 0.13450646690742576}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9138874272936518, "res": {"Yes": 0.9138874272936518, "No": 0.08611216399509035}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992847247314032, "res": {"Yes": 0.9992847247314032, "No": 0.0007150712999730292}, "ground_truth": 1}, {"key": "38509260", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9924343234069865, "res": {"Yes": 0.9924343234069865, "No": 0.007564187802729733}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.6624196576151815, "res": {"Yes": 0.6624196576151815, "No": 0.33756898147456}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8567899410973254, "res": {"Yes": 0.8567899410973254, "No": 0.14320199399592337}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9491835964665056, "res": {"Yes": 0.9491835964665056, "No": 0.050811920317747214}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9708822740580174, "res": {"Yes": 0.9708822740580174, "No": 0.029114818721637608}, "ground_truth": 1}, {"key": "37139607", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9091588205403861, "res": {"Yes": 0.9091588205403861, "No": 0.0908371351100728}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.016413912181635498, "res": {"No": 0.9835362221390097, "Yes": 0.016413912181635498}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0882800792395987, "res": {"No": 0.911710840534179, "Yes": 0.0882800792395987}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9040534809472375, "res": {"Yes": 0.9040534809472375, "No": 0.0959416005715656}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4272470934223419, "res": {"No": 0.5727239162121598, "Yes": 0.4272470934223419}, "ground_truth": 1}, {"key": "37092824", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.05968962853844206, "res": {"No": 0.9403010820029918, "Yes": 0.05968962853844206}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.39180476959524196, "res": {"No": 0.6081902103327712, "Yes": 0.39180476959524196}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9146620465181581, "res": {"Yes": 0.9146620465181581, "No": 0.08533522119811444}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.018853149439984423, "res": {"No": 0.981146250941575, "Yes": 0.018853149439984423}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.545499331058676, "res": {"Yes": 0.545499331058676, "No": 0.4544986662101694}, "ground_truth": 1}, {"key": "32191802", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.022648177361090125, "res": {"No": 0.977351246691944, "Yes": 0.022648177361090125}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.17667411528540725, "res": {"No": 0.8233244232914498, "Yes": 0.17667411528540725}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5002228399228545, "res": {"Yes": 0.5002228399228545, "No": 0.4997746260886411}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992523580236433, "res": {"Yes": 0.9992523580236433, "No": 0.0007467157256098086}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9923615325076003, "res": {"Yes": 0.9923615325076003, "No": 0.007637013159141912}, "ground_truth": 1}, {"key": "39396038", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9965863313608099, "res": {"Yes": 0.9965863313608099, "No": 0.003413578924807535}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.999666497024485, "res": {"Yes": 0.999666497024485, "No": 0.00033326186354640664}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.36398569921065554, "res": {"No": 0.6360136204941668, "Yes": 0.36398569921065554}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985875479506824, "res": {"Yes": 0.9985875479506824, "No": 0.0014122507086091881}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997721716868126, "res": {"Yes": 0.9997721716868126, "No": 0.00022714931077317104}, "ground_truth": 1}, {"key": "39076884", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.997495497712234, "res": {"Yes": 0.997495497712234, "No": 0.0025042199253211}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9977204294762014, "res": {"Yes": 0.9977204294762014, "No": 0.0022780934317103685}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9952328806546394, "res": {"Yes": 0.9952328806546394, "No": 0.004767076762472335}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9891745030406612, "res": {"Yes": 0.9891745030406612, "No": 0.010824729433519453}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9844887038162679, "res": {"Yes": 0.9844887038162679, "No": 0.015510755383214234}, "ground_truth": 1}, {"key": "27763432", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9932690192185807, "res": {"Yes": 0.9932690192185807, "No": 0.006730202813796534}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9872357479796048, "res": {"Yes": 0.9872357479796048, "No": 0.01276353885223676}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6727498583021282, "res": {"Yes": 0.6727498583021282, "No": 0.32724357751739724}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.7207337202170189, "res": {"Yes": 0.7207337202170189, "No": 0.27926176813137576}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8920469850200939, "res": {"Yes": 0.8920469850200939, "No": 0.10795082280033727}, "ground_truth": 1}, {"key": "37806929", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6121824867211461, "res": {"Yes": 0.6121824867211461, "No": 0.3878124238271214}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.6021390381250108, "res": {"Yes": 0.6021390381250108, "No": 0.3978545073312784}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9353956257901821, "res": {"Yes": 0.9353956257901821, "No": 0.0646031311211846}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9980633658006822, "res": {"Yes": 0.9980633658006822, "No": 0.0019363974824855462}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992957991658731, "res": {"Yes": 0.9992957991658731, "No": 0.0007040576612328343}, "ground_truth": 1}, {"key": "32334186", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9937659392950886, "res": {"Yes": 0.9937659392950886, "No": 0.006233595411798513}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.988358330791992, "res": {"Yes": 0.988358330791992, "No": 0.011641162156953738}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.09505106552100605, "res": {"No": 0.9049484845966647, "Yes": 0.09505106552100605}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8069646950970237, "res": {"Yes": 0.8069646950970237, "No": 0.19303497285318086}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9841191291976866, "res": {"Yes": 0.9841191291976866, "No": 0.01588079121154095}, "ground_truth": 1}, {"key": "36187324", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9747161895495576, "res": {"Yes": 0.9747161895495576, "No": 0.025283729320352687}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9962970568657277, "res": {"Yes": 0.9962970568657277, "No": 0.0037029017868210573}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.869953888902168, "res": {"Yes": 0.869953888902168, "No": 0.13004514754625648}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9942739621058165, "res": {"Yes": 0.9942739621058165, "No": 0.005725223448487518}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9962967006895935, "res": {"Yes": 0.9962967006895935, "No": 0.0037026888760895555}, "ground_truth": 1}, {"key": "35306009", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9862417538227237, "res": {"Yes": 0.9862417538227237, "No": 0.013757259920379999}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9841013492779948, "res": {"Yes": 0.9841013492779948, "No": 0.015897659211441286}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9982493580643699, "res": {"Yes": 0.9982493580643699, "No": 0.0017502957593213853}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993219875543854, "res": {"Yes": 0.9993219875543854, "No": 0.0006776538134876294}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992584255623376, "res": {"Yes": 0.9992584255623376, "No": 0.0007411244485563235}, "ground_truth": 1}, {"key": "39490050", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9690532258132944, "res": {"Yes": 0.9690532258132944, "No": 0.030944679528766124}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995229698150956, "res": {"Yes": 0.9995229698150956, "No": 0.00047662826002441177}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9993362750329283, "res": {"Yes": 0.9993362750329283, "No": 0.0006633279795344562}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9679831121342979, "res": {"Yes": 0.9679831121342979, "No": 0.03201604806055863}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984362507320015, "res": {"Yes": 0.9984362507320015, "No": 0.0015621953052649881}, "ground_truth": 1}, {"key": "38072149", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9844369456832686, "res": {"Yes": 0.9844369456832686, "No": 0.015562087673246463}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9717728113699864, "res": {"Yes": 0.9717728113699864, "No": 0.028226085806897252}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.38701121772613895, "res": {"No": 0.6129860072368591, "Yes": 0.38701121772613895}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.35480270661869745, "res": {"No": 0.6451958273315878, "Yes": 0.35480270661869745}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8256370407596917, "res": {"Yes": 0.8256370407596917, "No": 0.17436171773551998}, "ground_truth": 1}, {"key": "35899689", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8774610079622954, "res": {"Yes": 0.8774610079622954, "No": 0.1225383054899505}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.6660868909899125, "res": {"Yes": 0.6660868909899125, "No": 0.33391153044474486}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.00017349374692585755, "res": {"No": 0.9998256760361882, "Yes": 0.00017349374692585755}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9822476559059666, "res": {"Yes": 0.9822476559059666, "No": 0.01775195727723335}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9909297115090676, "res": {"Yes": 0.9909297115090676, "No": 0.009070127115626435}, "ground_truth": 1}, {"key": "27994518", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9961013886948741, "res": {"Yes": 0.9961013886948741, "No": 0.003898553469130862}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988824364398056, "res": {"Yes": 0.9988824364398056, "No": 0.0011175163507946629}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.1753145832972948, "res": {"No": 0.8246790736207927, "Yes": 0.1753145832972948}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9887454407856568, "res": {"Yes": 0.9887454407856568, "No": 0.011253631527238496}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9549333657686222, "res": {"Yes": 0.9549333657686222, "No": 0.04506483717926681}, "ground_truth": 1}, {"key": "10615479", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9862596167951356, "res": {"Yes": 0.9862596167951356, "No": 0.013739991648664752}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9384700651012257, "res": {"Yes": 0.9384700651012257, "No": 0.06152826600010951}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9976175625198215, "res": {"Yes": 0.9976175625198215, "No": 0.002381917841589696}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.997681040151426, "res": {"Yes": 0.997681040151426, "No": 0.0023168703293220466}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9927110325185851, "res": {"Yes": 0.9927110325185851, "No": 0.0072878551088907005}, "ground_truth": 1}, {"key": "40186667", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9981158545347971, "res": {"Yes": 0.9981158545347971, "No": 0.0018833458057343718}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9888210836939646, "res": {"Yes": 0.9888210836939646, "No": 0.011177434610208254}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.027607549805161728, "res": {"No": 0.9723909037658054, "Yes": 0.027607549805161728}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9898547646391953, "res": {"Yes": 0.9898547646391953, "No": 0.010144607453965419}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9982406932975469, "res": {"Yes": 0.9982406932975469, "No": 0.0017592441894137619}, "ground_truth": 1}, {"key": "38622886", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9960177652103819, "res": {"Yes": 0.9960177652103819, "No": 0.003982139664489645}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996877047743089, "res": {"Yes": 0.9996877047743089, "No": 0.0003122242120122616}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.02546795458619965, "res": {"No": 0.9745305978649952, "Yes": 0.02546795458619965}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.1665602788409075, "res": {"No": 0.8334386373144255, "Yes": 0.1665602788409075}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9966688657797224, "res": {"Yes": 0.9966688657797224, "No": 0.003331142302595396}, "ground_truth": 1}, {"key": "40686943", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.98966184886872, "res": {"Yes": 0.98966184886872, "No": 0.010337702809601099}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9758021971141441, "res": {"Yes": 0.9758021971141441, "No": 0.024197237626503058}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9111638924555832, "res": {"Yes": 0.9111638924555832, "No": 0.08883568070028622}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.5272451280338696, "res": {"Yes": 0.5272451280338696, "No": 0.4727511211012771}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2309554602088156, "res": {"No": 0.769043656966603, "Yes": 0.2309554602088156}, "ground_truth": 1}, {"key": "30604567", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4592222414207534, "res": {"No": 0.5407761805848051, "Yes": 0.4592222414207534}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.6846097302109383, "res": {"Yes": 0.6846097302109383, "No": 0.3153885525922852}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.997829734239647, "res": {"Yes": 0.997829734239647, "No": 0.002170032792050365}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9984638228299204, "res": {"Yes": 0.9984638228299204, "No": 0.0015359191229075647}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994611711504329, "res": {"Yes": 0.9994611711504329, "No": 0.0005385700426013964}, "ground_truth": 1}, {"key": "35440903", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997446533065177, "res": {"Yes": 0.9997446533065177, "No": 0.0002552969960855808}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986604165440106, "res": {"Yes": 0.9986604165440106, "No": 0.0013393286473590814}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9937401579622482, "res": {"Yes": 0.9937401579622482, "No": 0.006259119867504474}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.99130901387566, "res": {"Yes": 0.99130901387566, "No": 0.00869022020482498}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9758973327076499, "res": {"Yes": 0.9758973327076499, "No": 0.024102035053418427}, "ground_truth": 1}, {"key": "37219533", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9561996584359872, "res": {"Yes": 0.9561996584359872, "No": 0.04379986691827063}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9504009602597033, "res": {"Yes": 0.9504009602597033, "No": 0.049598524186418594}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9984601406019781, "res": {"Yes": 0.9984601406019781, "No": 0.0015391391690210067}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9900601545774547, "res": {"Yes": 0.9900601545774547, "No": 0.009938352664157601}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9876750066349473, "res": {"Yes": 0.9876750066349473, "No": 0.01232273929769198}, "ground_truth": 1}, {"key": "40178965", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9942613608569019, "res": {"Yes": 0.9942613608569019, "No": 0.005737395025339361}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986304619743333, "res": {"Yes": 0.9986304619743333, "No": 0.0013683891963342}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9957432390998994, "res": {"Yes": 0.9957432390998994, "No": 0.004255925635318221}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9647755484275432, "res": {"Yes": 0.9647755484275432, "No": 0.03522341213496797}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9907319317694169, "res": {"Yes": 0.9907319317694169, "No": 0.009264512852110617}, "ground_truth": 1}, {"key": "13750468", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.995515048567476, "res": {"Yes": 0.995515048567476, "No": 0.004484394940186332}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9880785785020803, "res": {"Yes": 0.9880785785020803, "No": 0.011920627215252666}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8718226750961241, "res": {"Yes": 0.8718226750961241, "No": 0.12817684077443128}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998251993094213, "res": {"Yes": 0.9998251993094213, "No": 0.00017424181189278233}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994798689150959, "res": {"Yes": 0.9994798689150959, "No": 0.0005197383739598228}, "ground_truth": 1}, {"key": "17754949", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.995320154163259, "res": {"Yes": 0.995320154163259, "No": 0.004679582885755254}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997066487358987, "res": {"Yes": 0.9997066487358987, "No": 0.00029303368197734845}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9093060616415746, "res": {"Yes": 0.9093060616415746, "No": 0.09069291858925219}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9469446000333935, "res": {"Yes": 0.9469446000333935, "No": 0.05305435632242471}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6228154201099771, "res": {"Yes": 0.6228154201099771, "No": 0.37718211670159607}, "ground_truth": 1}, {"key": "36675623", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.701075005367644, "res": {"Yes": 0.701075005367644, "No": 0.29892310876026607}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.4786298528918689, "res": {"No": 0.5213683923775748, "Yes": 0.4786298528918689}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9270783565183269, "res": {"Yes": 0.9270783565183269, "No": 0.07292056361337663}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.018825790766308654, "res": {"No": 0.9811698909641354, "Yes": 0.018825790766308654}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9839502491647438, "res": {"Yes": 0.9839502491647438, "No": 0.016049613671617693}, "ground_truth": 1}, {"key": "40035440", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9894345654793026, "res": {"Yes": 0.9894345654793026, "No": 0.010565059459079963}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9818353870249781, "res": {"Yes": 0.9818353870249781, "No": 0.018163848617249033}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.1298573550257039, "res": {"No": 0.8701402268798443, "Yes": 0.1298573550257039}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.00048779954741081093, "res": {"No": 0.9995102358736239, "Yes": 0.00048779954741081093}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7544303332706946, "res": {"Yes": 0.7544303332706946, "No": 0.2455666209818114}, "ground_truth": 1}, {"key": "37685909", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6093363468817469, "res": {"Yes": 0.6093363468817469, "No": 0.39065643036263376}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8149496555341507, "res": {"Yes": 0.8149496555341507, "No": 0.18504545155465263}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.3140566485345475, "res": {"No": 0.6859418648252651, "Yes": 0.3140566485345475}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9925461163142452, "res": {"Yes": 0.9925461163142452, "No": 0.007453213479951816}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9944280134238144, "res": {"Yes": 0.9944280134238144, "No": 0.0055706585524171294}, "ground_truth": 1}, {"key": "36938787", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9692632772952812, "res": {"Yes": 0.9692632772952812, "No": 0.03073571947245385}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991439347930754, "res": {"Yes": 0.9991439347930754, "No": 0.0008554468448441229}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9987947806609042, "res": {"Yes": 0.9987947806609042, "No": 0.0012041179160167787}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.997544129496441, "res": {"Yes": 0.997544129496441, "No": 0.0024554063796716217}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981431688479476, "res": {"Yes": 0.9981431688479476, "No": 0.0018562003560247287}, "ground_truth": 1}, {"key": "39398068", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9962497214263348, "res": {"Yes": 0.9962497214263348, "No": 0.003749874148361183}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9957977298514913, "res": {"Yes": 0.9957977298514913, "No": 0.004201328895699267}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9755837394594573, "res": {"Yes": 0.9755837394594573, "No": 0.02441552935879237}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998446263017435, "res": {"Yes": 0.9998446263017435, "No": 0.00015484204889589323}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9982073078954949, "res": {"Yes": 0.9982073078954949, "No": 0.0017923494103176672}, "ground_truth": 1}, {"key": "39926408", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.99959777115572, "res": {"Yes": 0.99959777115572, "No": 0.0004020425617297431}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988543656406803, "res": {"Yes": 0.9988543656406803, "No": 0.0011456008726701747}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9855509579641312, "res": {"Yes": 0.9855509579641312, "No": 0.014448278593464816}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.988096734612769, "res": {"Yes": 0.988096734612769, "No": 0.011902533528147108}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.996860139006234, "res": {"Yes": 0.996860139006234, "No": 0.00313939020547395}, "ground_truth": 1}, {"key": "40465336", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9874816526247201, "res": {"Yes": 0.9874816526247201, "No": 0.01251744509536339}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8920851244121915, "res": {"Yes": 0.8920851244121915, "No": 0.10791302504096653}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5982190426758474, "res": {"Yes": 0.5982190426758474, "No": 0.401758587242717}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9504840916656262, "res": {"Yes": 0.9504840916656262, "No": 0.04950626756242868}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9929547377450938, "res": {"Yes": 0.9929547377450938, "No": 0.007042922972671672}, "ground_truth": 1}, {"key": "34173549", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9908116155800435, "res": {"Yes": 0.9908116155800435, "No": 0.009185335812632391}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9809063517520655, "res": {"Yes": 0.9809063517520655, "No": 0.019088987135228046}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6560515111672625, "res": {"Yes": 0.6560515111672625, "No": 0.3439482393279351}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9833425298207791, "res": {"Yes": 0.9833425298207791, "No": 0.01665735363997869}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8493950829250918, "res": {"Yes": 0.8493950829250918, "No": 0.15060467825328275}, "ground_truth": 1}, {"key": "33541535", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9671018493476594, "res": {"Yes": 0.9671018493476594, "No": 0.03289781621031514}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9658024584882644, "res": {"Yes": 0.9658024584882644, "No": 0.03419729608093856}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.39547045400046477, "res": {"No": 0.6045267430230836, "Yes": 0.39547045400046477}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8645809934897577, "res": {"Yes": 0.8645809934897577, "No": 0.1354180931541997}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.987858545554619, "res": {"Yes": 0.987858545554619, "No": 0.012137699607255277}, "ground_truth": 1}, {"key": "35685195", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9921285553568455, "res": {"Yes": 0.9921285553568455, "No": 0.007867979373713526}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9866187344120241, "res": {"Yes": 0.9866187344120241, "No": 0.013375160316386683}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.20933308238092585, "res": {"No": 0.7906660163694007, "Yes": 0.20933308238092585}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992001010898376, "res": {"Yes": 0.9992001010898376, "No": 0.0007997119161197346}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997677621813781, "res": {"Yes": 0.9997677621813781, "No": 0.00023203066924811015}, "ground_truth": 1}, {"key": "28440730", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9980200370178723, "res": {"Yes": 0.9980200370178723, "No": 0.001979602919687553}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9974991727916662, "res": {"Yes": 0.9974991727916662, "No": 0.0025006782097470133}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.03536407870049749, "res": {"No": 0.9646349783297492, "Yes": 0.03536407870049749}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.2640899273333258, "res": {"No": 0.7359046060490034, "Yes": 0.2640899273333258}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5664717881607256, "res": {"Yes": 0.5664717881607256, "No": 0.4335261550411228}, "ground_truth": 1}, {"key": "38338714", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5333937151097614, "res": {"Yes": 0.5333937151097614, "No": 0.4665992624902}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.03187268836550163, "res": {"No": 0.9681256610466289, "Yes": 0.03187268836550163}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8834312815885671, "res": {"Yes": 0.8834312815885671, "No": 0.11655929097877153}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9658548353757748, "res": {"Yes": 0.9658548353757748, "No": 0.0341437542959753}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9898019800615955, "res": {"Yes": 0.9898019800615955, "No": 0.010196360867992426}, "ground_truth": 1}, {"key": "32191881", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5015769217460977, "res": {"Yes": 0.5015769217460977, "No": 0.4984183621462751}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.992330769181519, "res": {"Yes": 0.992330769181519, "No": 0.007666899366726485}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 1.8782537052332433e-05, "res": {"No": 0.9999803763825457, "Yes": 1.8782537052332433e-05}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.4361632234410378, "res": {"No": 0.5638345903613917, "Yes": 0.4361632234410378}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9830213763881989, "res": {"Yes": 0.9830213763881989, "No": 0.016976780864906636}, "ground_truth": 1}, {"key": "37707251", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9240898516709765, "res": {"Yes": 0.9240898516709765, "No": 0.07590805300584655}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9798398804133502, "res": {"Yes": 0.9798398804133502, "No": 0.02015843636594583}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.5139004034034118, "res": {"Yes": 0.5139004034034118, "No": 0.48609288108395515}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9964404920218745, "res": {"Yes": 0.9964404920218745, "No": 0.003557417210824288}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.992815831703948, "res": {"Yes": 0.992815831703948, "No": 0.007182251253580964}, "ground_truth": 1}, {"key": "40172567", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9969533843810491, "res": {"Yes": 0.9969533843810491, "No": 0.003044885442617704}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9652560153001712, "res": {"Yes": 0.9652560153001712, "No": 0.0347421372069351}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6495762836244243, "res": {"Yes": 0.6495762836244243, "No": 0.35042145211657494}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9662373094336268, "res": {"Yes": 0.9662373094336268, "No": 0.03376164446447788}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9848080449742527, "res": {"Yes": 0.9848080449742527, "No": 0.0151910323835492}, "ground_truth": 1}, {"key": "33113255", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9904265035099241, "res": {"Yes": 0.9904265035099241, "No": 0.009572252789673425}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9334683978262688, "res": {"Yes": 0.9334683978262688, "No": 0.06652922195139109}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9900066407972391, "res": {"Yes": 0.9900066407972391, "No": 0.009991375581050706}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9836831468681398, "res": {"Yes": 0.9836831468681398, "No": 0.016314853964058348}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9967555506377854, "res": {"Yes": 0.9967555506377854, "No": 0.003243948559420635}, "ground_truth": 1}, {"key": "33022143", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994576006517389, "res": {"Yes": 0.9994576006517389, "No": 0.0005415055187379508}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993432997220758, "res": {"Yes": 0.9993432997220758, "No": 0.0006557593374332176}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8672909812472066, "res": {"Yes": 0.8672909812472066, "No": 0.13270791415134384}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9692174777797299, "res": {"Yes": 0.9692174777797299, "No": 0.030781552907760017}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9748507613324789, "res": {"Yes": 0.9748507613324789, "No": 0.02514821397169562}, "ground_truth": 1}, {"key": "32084473", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9892219870294093, "res": {"Yes": 0.9892219870294093, "No": 0.01077786268345137}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.7118209741409322, "res": {"Yes": 0.7118209741409322, "No": 0.28817824040058243}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7494604360181657, "res": {"Yes": 0.7494604360181657, "No": 0.25053363400130707}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9031555872676672, "res": {"Yes": 0.9031555872676672, "No": 0.09684044032398618}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.43838967288554503, "res": {"No": 0.5616058554240579, "Yes": 0.43838967288554503}, "ground_truth": 1}, {"key": "40564245", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9279478907329095, "res": {"Yes": 0.9279478907329095, "No": 0.07204922593762053}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9366069198443054, "res": {"Yes": 0.9366069198443054, "No": 0.06339088484728511}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9484433047343119, "res": {"Yes": 0.9484433047343119, "No": 0.05155512785718907}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991649904146931, "res": {"Yes": 0.9991649904146931, "No": 0.0008342656342691259}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9966706366625363, "res": {"Yes": 0.9966706366625363, "No": 0.00332924276082681}, "ground_truth": 1}, {"key": "31717213", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989406013593851, "res": {"Yes": 0.9989406013593851, "No": 0.001059065884941685}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996283836364722, "res": {"Yes": 0.9996283836364722, "No": 0.0003712547772190705}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8903358783906721, "res": {"Yes": 0.8903358783906721, "No": 0.10966361982736683}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9900868019100832, "res": {"Yes": 0.9900868019100832, "No": 0.009912611932791828}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9811213536902088, "res": {"Yes": 0.9811213536902088, "No": 0.018876704184542053}, "ground_truth": 1}, {"key": "34861894", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9789695403099995, "res": {"Yes": 0.9789695403099995, "No": 0.021029701150419564}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9707251048242549, "res": {"Yes": 0.9707251048242549, "No": 0.029255716163616097}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9960265182529634, "res": {"Yes": 0.9960265182529634, "No": 0.003972629742194543}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9973556694178435, "res": {"Yes": 0.9973556694178435, "No": 0.002643744488235604}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995179656559027, "res": {"Yes": 0.9995179656559027, "No": 0.00048175879234040796}, "ground_truth": 1}, {"key": "40838760", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993594935818668, "res": {"Yes": 0.9993594935818668, "No": 0.0006400075402751746}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9963989441096124, "res": {"Yes": 0.9963989441096124, "No": 0.003600491194179175}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9034451492312338, "res": {"Yes": 0.9034451492312338, "No": 0.09655274882801963}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9946440343052758, "res": {"Yes": 0.9946440343052758, "No": 0.005355289579895843}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9904790064199998, "res": {"Yes": 0.9904790064199998, "No": 0.009520209729612955}, "ground_truth": 1}, {"key": "40044849", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9883104706986452, "res": {"Yes": 0.9883104706986452, "No": 0.011688436023523112}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9763632719043936, "res": {"Yes": 0.9763632719043936, "No": 0.023636049765619876}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9783682892091145, "res": {"Yes": 0.9783682892091145, "No": 0.021631385208294593}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9910479543144927, "res": {"Yes": 0.9910479543144927, "No": 0.008951725249186289}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990984756474812, "res": {"Yes": 0.9990984756474812, "No": 0.0009013048601849454}, "ground_truth": 1}, {"key": "30296116", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997097471315166, "res": {"Yes": 0.9997097471315166, "No": 0.00029012375054156676}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992891321163796, "res": {"Yes": 0.9992891321163796, "No": 0.0007106802130594575}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.46134513045687514, "res": {"No": 0.5386536141607534, "Yes": 0.46134513045687514}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8115678945753684, "res": {"Yes": 0.8115678945753684, "No": 0.18842967851000353}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8815619426638646, "res": {"Yes": 0.8815619426638646, "No": 0.11843535437079279}, "ground_truth": 1}, {"key": "34931360", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9536516156367678, "res": {"Yes": 0.9536516156367678, "No": 0.04634764951067288}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8628828452593528, "res": {"Yes": 0.8628828452593528, "No": 0.13710356360625453}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.877948818786494, "res": {"Yes": 0.877948818786494, "No": 0.1220501808957077}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9974716529671149, "res": {"Yes": 0.9974716529671149, "No": 0.002528175203290307}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996634024416733, "res": {"Yes": 0.9996634024416733, "No": 0.00033648768731321785}, "ground_truth": 1}, {"key": "18862422", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9974799579506711, "res": {"Yes": 0.9974799579506711, "No": 0.0025195884608039186}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998210279174143, "res": {"Yes": 0.9998210279174143, "No": 0.00017882122396577197}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9807938521688944, "res": {"Yes": 0.9807938521688944, "No": 0.019204531733732738}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9253651986824479, "res": {"Yes": 0.9253651986824479, "No": 0.07463220966572427}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.961308338266198, "res": {"Yes": 0.961308338266198, "No": 0.03868626312381013}, "ground_truth": 1}, {"key": "36361140", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9681086783352386, "res": {"Yes": 0.9681086783352386, "No": 0.031889279229095326}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9519003586044784, "res": {"Yes": 0.9519003586044784, "No": 0.04809844648505522}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9208455637610407, "res": {"Yes": 0.9208455637610407, "No": 0.07915078300752544}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9906538929073665, "res": {"Yes": 0.9906538929073665, "No": 0.009343424921560093}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9822683589579885, "res": {"Yes": 0.9822683589579885, "No": 0.017728845085843038}, "ground_truth": 1}, {"key": "39703329", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.961115475903748, "res": {"Yes": 0.961115475903748, "No": 0.03888206455909219}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9872783925487483, "res": {"Yes": 0.9872783925487483, "No": 0.012721217630287954}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8121812576697094, "res": {"Yes": 0.8121812576697094, "No": 0.18781756241379485}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.046667420107992796, "res": {"No": 0.953329523664931, "Yes": 0.046667420107992796}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9758942674189419, "res": {"Yes": 0.9758942674189419, "No": 0.024104715348738016}, "ground_truth": 1}, {"key": "34033324", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9869284308314538, "res": {"Yes": 0.9869284308314538, "No": 0.013071322114958029}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.8632208854096454, "res": {"Yes": 0.8632208854096454, "No": 0.1367763730955279}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.970575395612559, "res": {"Yes": 0.970575395612559, "No": 0.029423997384800046}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.994250626570917, "res": {"Yes": 0.994250626570917, "No": 0.0057481972216626735}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9721781397170463, "res": {"Yes": 0.9721781397170463, "No": 0.027821393607888247}, "ground_truth": 1}, {"key": "35658862", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998242589956666, "res": {"Yes": 0.998242589956666, "No": 0.0017568261881962613}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9784541056551862, "res": {"Yes": 0.9784541056551862, "No": 0.02154565299390976}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.24821573792066232, "res": {"No": 0.7517837220914048, "Yes": 0.24821573792066232}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9949167755351792, "res": {"Yes": 0.9949167755351792, "No": 0.005080990784957081}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9889729861894092, "res": {"Yes": 0.9889729861894092, "No": 0.011025119706232597}, "ground_truth": 1}, {"key": "36092657", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9723414239412022, "res": {"Yes": 0.9723414239412022, "No": 0.027652525970904625}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9540341419182173, "res": {"Yes": 0.9540341419182173, "No": 0.04596419293828231}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7508362076547901, "res": {"Yes": 0.7508362076547901, "No": 0.2491628949062446}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9879271883025571, "res": {"Yes": 0.9879271883025571, "No": 0.01207232060659751}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9969842998863567, "res": {"Yes": 0.9969842998863567, "No": 0.0030154807764696083}, "ground_truth": 1}, {"key": "26333438", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.989974858134221, "res": {"Yes": 0.989974858134221, "No": 0.010024842666774395}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9945598271285435, "res": {"Yes": 0.9945598271285435, "No": 0.005439950050771027}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.992951097877583, "res": {"Yes": 0.992951097877583, "No": 0.0070481930498183685}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988195161199329, "res": {"Yes": 0.9988195161199329, "No": 0.0011798711988254486}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997344081057733, "res": {"Yes": 0.9997344081057733, "No": 0.00026531440533757823}, "ground_truth": 1}, {"key": "34184963", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997929049571395, "res": {"Yes": 0.9997929049571395, "No": 0.00020654236690434086}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987091673421197, "res": {"Yes": 0.9987091673421197, "No": 0.001290580966968493}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9984300729268127, "res": {"Yes": 0.9984300729268127, "No": 0.0015694966753038379}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9955245020232621, "res": {"Yes": 0.9955245020232621, "No": 0.004475056059818686}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9964848747687244, "res": {"Yes": 0.9964848747687244, "No": 0.0035149344589351234}, "ground_truth": 1}, {"key": "35069975", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.985957529338899, "res": {"Yes": 0.985957529338899, "No": 0.014041893848316606}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9914361370851533, "res": {"Yes": 0.9914361370851533, "No": 0.008563382628547219}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7991515872473136, "res": {"Yes": 0.7991515872473136, "No": 0.20084681016114977}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.4862903526683509, "res": {"No": 0.5137053124605989, "Yes": 0.4862903526683509}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9972026023291581, "res": {"Yes": 0.9972026023291581, "No": 0.0027969324945548266}, "ground_truth": 1}, {"key": "36443950", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9925044206254804, "res": {"Yes": 0.9925044206254804, "No": 0.007494723795059143}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.00040931779448569975, "res": {"No": 0.9995869316962124, "Yes": 0.00040931779448569975}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9368208278236045, "res": {"Yes": 0.9368208278236045, "No": 0.06317431302033047}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9446803071846555, "res": {"Yes": 0.9446803071846555, "No": 0.05531666829763786}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9842795074288488, "res": {"Yes": 0.9842795074288488, "No": 0.01571919031479535}, "ground_truth": 1}, {"key": "29460858", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9784595840350605, "res": {"Yes": 0.9784595840350605, "No": 0.0215375576587234}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.841362136022545, "res": {"Yes": 0.841362136022545, "No": 0.15863336815703127}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9123826450140641, "res": {"Yes": 0.9123826450140641, "No": 0.08761450610002014}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8595222477227914, "res": {"Yes": 0.8595222477227914, "No": 0.14047702875233442}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9543263176273722, "res": {"Yes": 0.9543263176273722, "No": 0.04567217630888351}, "ground_truth": 1}, {"key": "36155704", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9960980747706766, "res": {"Yes": 0.9960980747706766, "No": 0.0039001022821427028}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9951930943283946, "res": {"Yes": 0.9951930943283946, "No": 0.004805976650912455}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9329022156543314, "res": {"Yes": 0.9329022156543314, "No": 0.0670970634330619}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9963324363012696, "res": {"Yes": 0.9963324363012696, "No": 0.0036674796379026525}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9956742112938402, "res": {"Yes": 0.9956742112938402, "No": 0.004325746726392668}, "ground_truth": 1}, {"key": "37185211", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9928858704807927, "res": {"Yes": 0.9928858704807927, "No": 0.007114013555426001}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9980683518382931, "res": {"Yes": 0.9980683518382931, "No": 0.0019314495418786973}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.09496065794980096, "res": {"No": 0.9050366399220847, "Yes": 0.09496065794980096}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9753328223272466, "res": {"Yes": 0.9753328223272466, "No": 0.024665531395795838}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9922277275307128, "res": {"Yes": 0.9922277275307128, "No": 0.007771014164642263}, "ground_truth": 1}, {"key": "36454885", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9590246965806453, "res": {"Yes": 0.9590246965806453, "No": 0.04097299859846346}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9485341462419261, "res": {"Yes": 0.9485341462419261, "No": 0.05146358545263621}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.6732592015661613, "res": {"Yes": 0.6732592015661613, "No": 0.3267376577347299}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.7280238065610529, "res": {"Yes": 0.7280238065610529, "No": 0.27196584239555577}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8090436401932365, "res": {"Yes": 0.8090436401932365, "No": 0.19095316317226071}, "ground_truth": 1}, {"key": "33148906", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7740474518282369, "res": {"Yes": 0.7740474518282369, "No": 0.22595000974405968}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9734144293824284, "res": {"Yes": 0.9734144293824284, "No": 0.026582842236978637}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9199854208194459, "res": {"Yes": 0.9199854208194459, "No": 0.08000777992293091}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9713299306567305, "res": {"Yes": 0.9713299306567305, "No": 0.028666152717980387}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9184175447456031, "res": {"Yes": 0.9184175447456031, "No": 0.08157236814766866}, "ground_truth": 1}, {"key": "18086604", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9291660365821202, "res": {"Yes": 0.9291660365821202, "No": 0.07082565394905212}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9723036628229121, "res": {"Yes": 0.9723036628229121, "No": 0.027691986601682036}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8359686220791308, "res": {"Yes": 0.8359686220791308, "No": 0.16403028558595012}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9487374455326573, "res": {"Yes": 0.9487374455326573, "No": 0.05126160231249127}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.17403807184408654, "res": {"No": 0.8259582347468466, "Yes": 0.17403807184408654}, "ground_truth": 1}, {"key": "33693397", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.44663476450768685, "res": {"No": 0.5533642800845046, "Yes": 0.44663476450768685}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.18402514004995482, "res": {"No": 0.8159718599239344, "Yes": 0.18402514004995482}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8799991301291287, "res": {"Yes": 0.8799991301291287, "No": 0.11999970434437515}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8551703757619227, "res": {"Yes": 0.8551703757619227, "No": 0.14482592137950115}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9961271610598952, "res": {"Yes": 0.9961271610598952, "No": 0.0038712667496840353}, "ground_truth": 1}, {"key": "39501530", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9761681720343675, "res": {"Yes": 0.9761681720343675, "No": 0.0238296660933169}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9838234320544859, "res": {"Yes": 0.9838234320544859, "No": 0.01617612320827347}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9975683216314885, "res": {"Yes": 0.9975683216314885, "No": 0.002431177429723311}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9978735343384161, "res": {"Yes": 0.9978735343384161, "No": 0.002125890511826018}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998993660000068, "res": {"Yes": 0.998993660000068, "No": 0.001006262808870721}, "ground_truth": 1}, {"key": "30948874", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995590572738309, "res": {"Yes": 0.9995590572738309, "No": 0.0004406383883286516}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986661272001982, "res": {"Yes": 0.9986661272001982, "No": 0.001333132957098472}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9783535707472841, "res": {"Yes": 0.9783535707472841, "No": 0.021645999980717357}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.6528461189019096, "res": {"Yes": 0.6528461189019096, "No": 0.34715278269017075}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9632428483536807, "res": {"Yes": 0.9632428483536807, "No": 0.03675604783221275}, "ground_truth": 1}, {"key": "39410675", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9506374817583235, "res": {"Yes": 0.9506374817583235, "No": 0.04936150053645141}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.40115307153004764, "res": {"No": 0.5988460413682299, "Yes": 0.40115307153004764}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 1.763249590686381e-05, "res": {"No": 0.9999820452021894, "Yes": 1.763249590686381e-05}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9927855135570096, "res": {"Yes": 0.9927855135570096, "No": 0.007213828283600428}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.970344903564434, "res": {"Yes": 0.970344903564434, "No": 0.02965395123066552}, "ground_truth": 1}, {"key": "32903337", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9968398813193833, "res": {"Yes": 0.9968398813193833, "No": 0.003157016004156199}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.991169039030489, "res": {"Yes": 0.991169039030489, "No": 0.00882879005825861}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.1988831326272524, "res": {"No": 0.8011062452085329, "Yes": 0.1988831326272524}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9674632299694026, "res": {"Yes": 0.9674632299694026, "No": 0.03253500800475587}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9771087558634411, "res": {"Yes": 0.9771087558634411, "No": 0.022889369275144126}, "ground_truth": 1}, {"key": "27685132", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8292446440200599, "res": {"Yes": 0.8292446440200599, "No": 0.17075457283694004}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9571327810260707, "res": {"Yes": 0.9571327810260707, "No": 0.04286491584971861}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.10850384161918387, "res": {"No": 0.8914942994810022, "Yes": 0.10850384161918387}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9329857327988484, "res": {"Yes": 0.9329857327988484, "No": 0.06701175012282672}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.977865200741128, "res": {"Yes": 0.977865200741128, "No": 0.022133393340596615}, "ground_truth": 1}, {"key": "22791471", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9590858727991243, "res": {"Yes": 0.9590858727991243, "No": 0.040912972098396116}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.10245319693012975, "res": {"No": 0.8975453595573786, "Yes": 0.10245319693012975}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9882439902776525, "res": {"Yes": 0.9882439902776525, "No": 0.011755257401936826}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8732260953662223, "res": {"Yes": 0.8732260953662223, "No": 0.12677327806503547}, "ground_truth": 1}, {"key": "32292348", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9911850881695026, "res": {"Yes": 0.9911850881695026, "No": 0.008814261008989032}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.977221224622199, "res": {"Yes": 0.977221224622199, "No": 0.0227778068691174}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9977171068731596, "res": {"Yes": 0.9977171068731596, "No": 0.002282643677614563}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998471291759354, "res": {"Yes": 0.9998471291759354, "No": 0.00015277321364037181}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999904567213998, "res": {"Yes": 0.999904567213998, "No": 9.529568751565893e-05}, "ground_truth": 1}, {"key": "20482930", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9971791394251583, "res": {"Yes": 0.9971791394251583, "No": 0.0028206427344054496}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988176147689117, "res": {"Yes": 0.9988176147689117, "No": 0.001182175374526869}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8152889000214721, "res": {"Yes": 0.8152889000214721, "No": 0.1847105266811656}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995301149705649, "res": {"Yes": 0.9995301149705649, "No": 0.00046980102686152136}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99265758421153, "res": {"Yes": 0.99265758421153, "No": 0.007342293641140713}, "ground_truth": 1}, {"key": "11635754", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992497375876955, "res": {"Yes": 0.9992497375876955, "No": 0.000750143002206793}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9985313300528409, "res": {"Yes": 0.9985313300528409, "No": 0.0014684033866359037}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.007015621313988472, "res": {"No": 0.9929840068081998, "Yes": 0.007015621313988472}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9654948781457353, "res": {"Yes": 0.9654948781457353, "No": 0.03450424910724031}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9912778585142563, "res": {"Yes": 0.9912778585142563, "No": 0.008721596264140577}, "ground_truth": 1}, {"key": "40029096", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9613857939992745, "res": {"Yes": 0.9613857939992745, "No": 0.038614037608838435}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9677467188727806, "res": {"Yes": 0.9677467188727806, "No": 0.03225151371314981}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9995914559468584, "res": {"Yes": 0.9995914559468584, "No": 0.000408352616780516}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.4006403447550582, "res": {"No": 0.599357356664252, "Yes": 0.4006403447550582}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.983548443636038, "res": {"Yes": 0.983548443636038, "No": 0.016450727135199156}, "ground_truth": 1}, {"key": "40414719", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9898752103005108, "res": {"Yes": 0.9898752103005108, "No": 0.010123751320147967}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9865168540224253, "res": {"Yes": 0.9865168540224253, "No": 0.013482267169386278}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.3937821111659498, "res": {"No": 0.6062158913523727, "Yes": 0.3937821111659498}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.7562299139922931, "res": {"Yes": 0.7562299139922931, "No": 0.24376768936098678}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.738313786343349, "res": {"Yes": 0.738313786343349, "No": 0.2616839502862018}, "ground_truth": 1}, {"key": "39537616", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.814896049908918, "res": {"Yes": 0.814896049908918, "No": 0.1851024543317085}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.6274018591304704, "res": {"Yes": 0.6274018591304704, "No": 0.372596393552992}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.0003599608076786238, "res": {"No": 0.9996380246288984, "Yes": 0.0003599608076786238}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995627509912224, "res": {"Yes": 0.9995627509912224, "No": 0.0004364378392683229}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9723995814209502, "res": {"Yes": 0.9723995814209502, "No": 0.027597171317712894}, "ground_truth": 1}, {"key": "33245830", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9871087689993426, "res": {"Yes": 0.9871087689993426, "No": 0.012889541923719714}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.7158887866454403, "res": {"Yes": 0.7158887866454403, "No": 0.2841011904161527}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.643632575907201, "res": {"Yes": 0.643632575907201, "No": 0.356364106406157}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.6110504216846063, "res": {"Yes": 0.6110504216846063, "No": 0.38894847466919313}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9859100221199246, "res": {"Yes": 0.9859100221199246, "No": 0.014088541099311762}, "ground_truth": 1}, {"key": "39243601", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9821593302747065, "res": {"Yes": 0.9821593302747065, "No": 0.017840311343368827}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9780739669294269, "res": {"Yes": 0.9780739669294269, "No": 0.021925652075067834}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9065885048158638, "res": {"Yes": 0.9065885048158638, "No": 0.09339971361244064}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9851265395627526, "res": {"Yes": 0.9851265395627526, "No": 0.01487227258488689}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9899889921061658, "res": {"Yes": 0.9899889921061658, "No": 0.010007488662485105}, "ground_truth": 1}, {"key": "35815905", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9515341140235336, "res": {"Yes": 0.9515341140235336, "No": 0.04846388003707626}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.6277387953553704, "res": {"Yes": 0.6277387953553704, "No": 0.3722579094353847}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.07758037370145718, "res": {"No": 0.922419001660887, "Yes": 0.07758037370145718}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.990787281545588, "res": {"Yes": 0.990787281545588, "No": 0.009212624567353972}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9875111856801255, "res": {"Yes": 0.9875111856801255, "No": 0.012488013683970848}, "ground_truth": 1}, {"key": "35260212", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9841971758916169, "res": {"Yes": 0.9841971758916169, "No": 0.01580191727171512}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9960520656647615, "res": {"Yes": 0.9960520656647615, "No": 0.003947909818488233}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8240461280469669, "res": {"Yes": 0.8240461280469669, "No": 0.17595223725622236}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9452096634617, "res": {"Yes": 0.9452096634617, "No": 0.05478950598445618}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9977015599677009, "res": {"Yes": 0.9977015599677009, "No": 0.0022982737070775788}, "ground_truth": 1}, {"key": "39193924", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995491714737335, "res": {"Yes": 0.9995491714737335, "No": 0.00045068270619203827}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.999750969483248, "res": {"Yes": 0.999750969483248, "No": 0.0002483841545490021}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.44076909709932116, "res": {"No": 0.5592157629867334, "Yes": 0.44076909709932116}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9203258655213563, "res": {"Yes": 0.9203258655213563, "No": 0.0796718464206402}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8332646334935853, "res": {"Yes": 0.8332646334935853, "No": 0.1667308072102049}, "ground_truth": 1}, {"key": "40658569", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9682074507890577, "res": {"Yes": 0.9682074507890577, "No": 0.03178941201948679}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.946781619327316, "res": {"Yes": 0.946781619327316, "No": 0.05321567410470362}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9748449853588295, "res": {"Yes": 0.9748449853588295, "No": 0.025154018849337403}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9973774905020355, "res": {"Yes": 0.9973774905020355, "No": 0.002621998147926383}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985813692093449, "res": {"Yes": 0.9985813692093449, "No": 0.0014181642860529383}, "ground_truth": 1}, {"key": "33497596", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9974351305129631, "res": {"Yes": 0.9974351305129631, "No": 0.0025647904489821106}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9862300472025848, "res": {"Yes": 0.9862300472025848, "No": 0.013768539992552871}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.24893902301430967, "res": {"No": 0.751049310189487, "Yes": 0.24893902301430967}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9716273867334337, "res": {"Yes": 0.9716273867334337, "No": 0.028372009522844844}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.936042338581787, "res": {"Yes": 0.936042338581787, "No": 0.06395290665477303}, "ground_truth": 1}, {"key": "40339241", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9295375077740318, "res": {"Yes": 0.9295375077740318, "No": 0.07045722435829702}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9773539871886818, "res": {"Yes": 0.9773539871886818, "No": 0.022645047599250352}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.986381148233554, "res": {"Yes": 0.986381148233554, "No": 0.013618005881906385}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997957652387589, "res": {"Yes": 0.9997957652387589, "No": 0.0002041007119323453}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991642757621892, "res": {"Yes": 0.9991642757621892, "No": 0.0008355965141195959}, "ground_truth": 1}, {"key": "31792608", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996606616882732, "res": {"Yes": 0.9996606616882732, "No": 0.00033913869651292715}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999055207534452, "res": {"Yes": 0.9999055207534452, "No": 9.420493398439137e-05}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.994831823218807, "res": {"Yes": 0.994831823218807, "No": 0.005168087404548602}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9965668029446888, "res": {"Yes": 0.9965668029446888, "No": 0.003432930532253955}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9933102983502146, "res": {"Yes": 0.9933102983502146, "No": 0.00668965751315989}, "ground_truth": 1}, {"key": "33132662", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9979395336811571, "res": {"Yes": 0.9979395336811571, "No": 0.002060249885286892}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9846664486210424, "res": {"Yes": 0.9846664486210424, "No": 0.01533328075789446}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.21161554872722071, "res": {"No": 0.7883825400532334, "Yes": 0.21161554872722071}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.999782897630131, "res": {"Yes": 0.999782897630131, "No": 0.00021699787437037872}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993959178311462, "res": {"Yes": 0.9993959178311462, "No": 0.0006035307898024078}, "ground_truth": 1}, {"key": "37577457", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9976498359699892, "res": {"Yes": 0.9976498359699892, "No": 0.0023499282105378235}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.999487490337967, "res": {"Yes": 0.999487490337967, "No": 0.0005123272381879743}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8939156763619353, "res": {"Yes": 0.8939156763619353, "No": 0.10607167862032932}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.7851780277259145, "res": {"Yes": 0.7851780277259145, "No": 0.21481299930475867}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9614273325312405, "res": {"Yes": 0.9614273325312405, "No": 0.03856757428409172}, "ground_truth": 1}, {"key": "38701278", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9528943994686742, "res": {"Yes": 0.9528943994686742, "No": 0.04710190184030089}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.916893012264135, "res": {"Yes": 0.916893012264135, "No": 0.08305644849507807}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.7694996350287013, "res": {"Yes": 0.7694996350287013, "No": 0.23049986551376173}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9765580953393436, "res": {"Yes": 0.9765580953393436, "No": 0.023441406776779663}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.840041032603768, "res": {"Yes": 0.840041032603768, "No": 0.15995871952588808}, "ground_truth": 1}, {"key": "34570783", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9414592319413395, "res": {"Yes": 0.9414592319413395, "No": 0.05854040964786827}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9716267153391415, "res": {"Yes": 0.9716267153391415, "No": 0.02837285502132649}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9707479035417911, "res": {"Yes": 0.9707479035417911, "No": 0.029251494868347463}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9957691210932578, "res": {"Yes": 0.9957691210932578, "No": 0.004230686525163487}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9874765414329139, "res": {"Yes": 0.9874765414329139, "No": 0.012523116782531747}, "ground_truth": 1}, {"key": "39064526", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9969056315573195, "res": {"Yes": 0.9969056315573195, "No": 0.003093903551719233}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9833901365516919, "res": {"Yes": 0.9833901365516919, "No": 0.01660543379723393}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.49076091726032806, "res": {"No": 0.5092371792198461, "Yes": 0.49076091726032806}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9368851699506898, "res": {"Yes": 0.9368851699506898, "No": 0.06311240096375988}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7252887192159698, "res": {"Yes": 0.7252887192159698, "No": 0.2747000510614904}, "ground_truth": 1}, {"key": "40741545", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8202923072408784, "res": {"Yes": 0.8202923072408784, "No": 0.17970350152322828}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.4298608058605901, "res": {"No": 0.5701273055618109, "Yes": 0.4298608058605901}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9075308671800789, "res": {"Yes": 0.9075308671800789, "No": 0.09246745816340873}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9875616389154188, "res": {"Yes": 0.9875616389154188, "No": 0.012436402768588248}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9956252800870979, "res": {"Yes": 0.9956252800870979, "No": 0.004373701514111294}, "ground_truth": 1}, {"key": "36929751", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9960927535293647, "res": {"Yes": 0.9960927535293647, "No": 0.003906112285550901}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9877493250614072, "res": {"Yes": 0.9877493250614072, "No": 0.01224690533035766}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.965080118246329, "res": {"Yes": 0.965080118246329, "No": 0.0349175503784588}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.8004267068101069, "res": {"Yes": 0.8004267068101069, "No": 0.19957132255521998}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9706508569006349, "res": {"Yes": 0.9706508569006349, "No": 0.029347702944874036}, "ground_truth": 1}, {"key": "23984730", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.975351425027571, "res": {"Yes": 0.975351425027571, "No": 0.024647660295909125}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9838543550947473, "res": {"Yes": 0.9838543550947473, "No": 0.016141494893729565}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.8053164445393673, "res": {"Yes": 0.8053164445393673, "No": 0.19467964841152435}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9796724577200752, "res": {"Yes": 0.9796724577200752, "No": 0.020326484257810667}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9911289956190455, "res": {"Yes": 0.9911289956190455, "No": 0.008865960481305208}, "ground_truth": 1}, {"key": "36007415", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9966374809719647, "res": {"Yes": 0.9966374809719647, "No": 0.0033621097509049305}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9266158968855823, "res": {"Yes": 0.9266158968855823, "No": 0.07338299837209239}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_2_ft_gpt35", "target_model": "human", "recognition_score": 0.9208486946412799, "res": {"Yes": 0.9208486946412799, "No": 0.07914865388009944}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_2_ft_gpt35", "target_model": "claude", "recognition_score": 0.9914121133914637, "res": {"Yes": 0.9914121133914637, "No": 0.008586560762928772}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_2_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983429766816035, "res": {"Yes": 0.9983429766816035, "No": 0.0016561836987075975}, "ground_truth": 1}, {"key": "38875041", "model": "xsum_2_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988292720373195, "res": {"Yes": 0.9988292720373195, "No": 0.0011702092500113425}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_2_ft_gpt35", "target_model": "llama", "recognition_score": 0.9945338955125923, "res": {"Yes": 0.9945338955125923, "No": 0.005464744010933268}, "ground_truth": 0}]