[{"key": "33773576", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2697230195635238, "res": {"No": 0.7302662976927208, "Yes": 0.2697230195635238}, "ground_truth": 0}, {"key": "33773576", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4073156505293794, "res": {"No": 0.5926622522507181, "Yes": 0.4073156505293794}, "ground_truth": 0}, {"key": "33773576", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.46937345879269726, "res": {"No": 0.5306113429052107, "Yes": 0.46937345879269726}, "ground_truth": 1}, {"key": "33773576", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4177965865293151, "res": {"No": 0.5821901304374879, "Yes": 0.4177965865293151}, "ground_truth": 0}, {"key": "33773576", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.44779537640713846, "res": {"No": 0.5521862390636806, "Yes": 0.44779537640713846}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1647597049833571, "res": {"No": 0.8352263358194691, "Yes": 0.1647597049833571}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5492990160032971, "res": {"Yes": 0.5492990160032971, "No": 0.45067736609234066}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.559713557824781, "res": {"Yes": 0.559713557824781, "No": 0.4402687425484103}, "ground_truth": 1}, {"key": "37642631", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.400933299869784, "res": {"No": 0.5990456041250647, "Yes": 0.400933299869784}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.45124575796987865, "res": {"No": 0.5487333482104287, "Yes": 0.45124575796987865}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.08759478629424444, "res": {"No": 0.9123984384943415, "Yes": 0.08759478629424444}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.32903896519055925, "res": {"No": 0.6709450755370492, "Yes": 0.32903896519055925}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4737120648437759, "res": {"No": 0.5262691842595627, "Yes": 0.4737120648437759}, "ground_truth": 1}, {"key": "36609836", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.42962824102251507, "res": {"No": 0.5703549230019656, "Yes": 0.42962824102251507}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.46361519126955686, "res": {"No": 0.536363728557418, "Yes": 0.46361519126955686}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.21606285894499289, "res": {"No": 0.7839205053577766, "Yes": 0.21606285894499289}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.39770665229146485, "res": {"No": 0.6022724266941942, "Yes": 0.39770665229146485}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.1427493905224701, "res": {"No": 0.857235073383579, "Yes": 0.1427493905224701}, "ground_truth": 1}, {"key": "41035610", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.31488181532566684, "res": {"No": 0.6850929613004546, "Yes": 0.31488181532566684}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.21004667732236518, "res": {"No": 0.789932184192561, "Yes": 0.21004667732236518}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.47102924292661746, "res": {"No": 0.5289503700267599, "Yes": 0.47102924292661746}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5428121322700122, "res": {"Yes": 0.5428121322700122, "No": 0.45717297893502934}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5193713193958445, "res": {"Yes": 0.5193713193958445, "No": 0.48061120084021786}, "ground_truth": 1}, {"key": "37592684", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5916079145638142, "res": {"Yes": 0.5916079145638142, "No": 0.408377086418076}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.40186908604709803, "res": {"No": 0.5981143036916405, "Yes": 0.40186908604709803}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.48528326839212316, "res": {"No": 0.5147018592662266, "Yes": 0.48528326839212316}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.436965165446877, "res": {"No": 0.5630147231168077, "Yes": 0.436965165446877}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4478244840525932, "res": {"No": 0.5521589065213329, "Yes": 0.4478244840525932}, "ground_truth": 1}, {"key": "38951040", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.41596531483395033, "res": {"No": 0.5840173252908695, "Yes": 0.41596531483395033}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.35524783724633374, "res": {"No": 0.6447348706162974, "Yes": 0.35524783724633374}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4768021757535036, "res": {"No": 0.5231675881566283, "Yes": 0.4768021757535036}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.45255145934439156, "res": {"No": 0.5474129091445942, "Yes": 0.45255145934439156}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3453016141383843, "res": {"No": 0.6546664367057674, "Yes": 0.3453016141383843}, "ground_truth": 1}, {"key": "40774469", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.28343155883605564, "res": {"No": 0.7165510536718261, "Yes": 0.28343155883605564}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.42359964640211717, "res": {"No": 0.5763707858238108, "Yes": 0.42359964640211717}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.06551909841413658, "res": {"No": 0.9344581279429592, "Yes": 0.06551909841413658}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3713436109287605, "res": {"No": 0.6286334956036882, "Yes": 0.3713436109287605}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.37247878343728086, "res": {"No": 0.6275005572053117, "Yes": 0.37247878343728086}, "ground_truth": 1}, {"key": "40876288", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.41452266316115516, "res": {"No": 0.5854577260516193, "Yes": 0.41452266316115516}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.36102080165367806, "res": {"No": 0.6389571913763472, "Yes": 0.36102080165367806}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3004979641214624, "res": {"No": 0.6994841109516441, "Yes": 0.3004979641214624}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.27200275066453833, "res": {"No": 0.7279796386965546, "Yes": 0.27200275066453833}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3781875811588937, "res": {"No": 0.6217919672399695, "Yes": 0.3781875811588937}, "ground_truth": 1}, {"key": "40340131", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4670298522073689, "res": {"No": 0.5329509581634153, "Yes": 0.4670298522073689}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.36019119186344584, "res": {"No": 0.6397861878055001, "Yes": 0.36019119186344584}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3328191265772731, "res": {"No": 0.6671626097836889, "Yes": 0.3328191265772731}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2685207733149727, "res": {"No": 0.7314677112214548, "Yes": 0.2685207733149727}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5704697412999166, "res": {"Yes": 0.5704697412999166, "No": 0.4295123429584391}, "ground_truth": 1}, {"key": "30121591", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3299502148479055, "res": {"No": 0.6700357297727391, "Yes": 0.3299502148479055}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.31631956333946687, "res": {"No": 0.683666782601411, "Yes": 0.31631956333946687}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.22761408357945762, "res": {"No": 0.7723707485515182, "Yes": 0.22761408357945762}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.34989834833891054, "res": {"No": 0.6500804204346436, "Yes": 0.34989834833891054}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.26095012026998166, "res": {"No": 0.7390333429792574, "Yes": 0.26095012026998166}, "ground_truth": 1}, {"key": "35623366", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.37193024341285985, "res": {"No": 0.6280493705993013, "Yes": 0.37193024341285985}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.34267407576040476, "res": {"No": 0.6573041828816055, "Yes": 0.34267407576040476}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2380633414913175, "res": {"No": 0.7619247968223779, "Yes": 0.2380633414913175}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.48985367251301604, "res": {"No": 0.5101246200876939, "Yes": 0.48985367251301604}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.548141891201157, "res": {"Yes": 0.548141891201157, "No": 0.45184398258831887}, "ground_truth": 1}, {"key": "41014093", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4500523363411975, "res": {"No": 0.549926305353215, "Yes": 0.4500523363411975}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5583751671271648, "res": {"Yes": 0.5583751671271648, "No": 0.4416041219221385}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.0357507296593019, "res": {"No": 0.9642415598542853, "Yes": 0.0357507296593019}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.35033021425012956, "res": {"No": 0.6496511970808788, "Yes": 0.35033021425012956}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3770721238064825, "res": {"No": 0.6229070056225521, "Yes": 0.3770721238064825}, "ground_truth": 1}, {"key": "11387984", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.35989167341290773, "res": {"No": 0.6400863755103584, "Yes": 0.35989167341290773}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4086641538460303, "res": {"No": 0.5913128409020871, "Yes": 0.4086641538460303}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.008342060756737248, "res": {"No": 0.9916469928600872, "Yes": 0.008342060756737248}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.286819207712779, "res": {"No": 0.7131645783703663, "Yes": 0.286819207712779}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3041389225159529, "res": {"No": 0.6958305088995425, "Yes": 0.3041389225159529}, "ground_truth": 1}, {"key": "39508312", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3072638973964748, "res": {"No": 0.6927112037882478, "Yes": 0.3072638973964748}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3853126100543104, "res": {"No": 0.6146670253454902, "Yes": 0.3853126100543104}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.07403552747322843, "res": {"No": 0.9259540477753111, "Yes": 0.07403552747322843}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.44774483659527137, "res": {"No": 0.5522250038982925, "Yes": 0.44774483659527137}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.40507450736229156, "res": {"No": 0.5948959456443617, "Yes": 0.40507450736229156}, "ground_truth": 1}, {"key": "35815369", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.36095026508068256, "res": {"No": 0.6390188537204753, "Yes": 0.36095026508068256}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.43857466851061844, "res": {"No": 0.561395096222932, "Yes": 0.43857466851061844}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2248984751869625, "res": {"No": 0.7750894941291399, "Yes": 0.2248984751869625}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.31276367674242, "res": {"No": 0.6872200310120029, "Yes": 0.31276367674242}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4397789168722348, "res": {"No": 0.5601977682551089, "Yes": 0.4397789168722348}, "ground_truth": 1}, {"key": "35802823", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3557213782222721, "res": {"No": 0.6442547671130822, "Yes": 0.3557213782222721}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.38267504362662336, "res": {"No": 0.6173044319720555, "Yes": 0.38267504362662336}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.43966166106067384, "res": {"No": 0.5603112478053224, "Yes": 0.43966166106067384}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.33123102047581743, "res": {"No": 0.668742650994102, "Yes": 0.33123102047581743}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.48032015876357725, "res": {"No": 0.5196580915059716, "Yes": 0.48032015876357725}, "ground_truth": 1}, {"key": "38499968", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4598763407938213, "res": {"No": 0.540094043476962, "Yes": 0.4598763407938213}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.35535297063509275, "res": {"No": 0.6446207948740907, "Yes": 0.35535297063509275}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5356717565487698, "res": {"Yes": 0.5356717565487698, "No": 0.46430291765005915}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5299688488065685, "res": {"Yes": 0.5299688488065685, "No": 0.47000574074573487}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5062702304384099, "res": {"Yes": 0.5062702304384099, "No": 0.4937084096178641}, "ground_truth": 1}, {"key": "36926726", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3942467486884531, "res": {"No": 0.6057319000276141, "Yes": 0.3942467486884531}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.39191194285500147, "res": {"No": 0.608064206375651, "Yes": 0.39191194285500147}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3043374683552477, "res": {"No": 0.6956488304006391, "Yes": 0.3043374683552477}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2545198372644297, "res": {"No": 0.745464622970929, "Yes": 0.2545198372644297}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.549591007559036, "res": {"Yes": 0.549591007559036, "No": 0.4503797334273375}, "ground_truth": 1}, {"key": "40903712", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.449319236250453, "res": {"No": 0.5506592071493658, "Yes": 0.449319236250453}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3392796111228481, "res": {"No": 0.6607025304503642, "Yes": 0.3392796111228481}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2574293925933439, "res": {"No": 0.7425532156235862, "Yes": 0.2574293925933439}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3434982712786784, "res": {"No": 0.656486683189656, "Yes": 0.3434982712786784}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4207246656848955, "res": {"No": 0.5792533108111073, "Yes": 0.4207246656848955}, "ground_truth": 1}, {"key": "19614862", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.37137691080333, "res": {"No": 0.6286011218122969, "Yes": 0.37137691080333}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.39143037147779836, "res": {"No": 0.6085496170162196, "Yes": 0.39143037147779836}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.03005551822753511, "res": {"No": 0.9699343831596332, "Yes": 0.03005551822753511}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.31052287774024967, "res": {"No": 0.6894562055566721, "Yes": 0.31052287774024967}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3634784022017679, "res": {"No": 0.6365025211717393, "Yes": 0.3634784022017679}, "ground_truth": 1}, {"key": "38861704", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3761519104616388, "res": {"No": 0.623824843210186, "Yes": 0.3761519104616388}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4448474075513658, "res": {"No": 0.5551325527323282, "Yes": 0.4448474075513658}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.17870889176436516, "res": {"No": 0.8212756720055397, "Yes": 0.17870889176436516}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3568409909234159, "res": {"No": 0.6431394141323044, "Yes": 0.3568409909234159}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.36976919325603813, "res": {"No": 0.6302066484301476, "Yes": 0.36976919325603813}, "ground_truth": 1}, {"key": "34349607", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.37333506685883777, "res": {"No": 0.626640479958269, "Yes": 0.37333506685883777}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.37434682938326963, "res": {"No": 0.6256384253762745, "Yes": 0.37434682938326963}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.18332165343308773, "res": {"No": 0.8166629133663287, "Yes": 0.18332165343308773}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.31426726270170574, "res": {"No": 0.6857083482369983, "Yes": 0.31426726270170574}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5283617274369958, "res": {"Yes": 0.5283617274369958, "No": 0.47161377578759983}, "ground_truth": 1}, {"key": "20773800", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4311257621111712, "res": {"No": 0.5688429463016917, "Yes": 0.4311257621111712}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4162137202685506, "res": {"No": 0.5837645180424649, "Yes": 0.4162137202685506}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.37447849533875865, "res": {"No": 0.625502832942935, "Yes": 0.37447849533875865}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4661512857550856, "res": {"No": 0.5338277006745682, "Yes": 0.4661512857550856}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.38265634658076425, "res": {"No": 0.6173260688715806, "Yes": 0.38265634658076425}, "ground_truth": 1}, {"key": "35545608", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4592651577444482, "res": {"No": 0.540712242989278, "Yes": 0.4592651577444482}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.37850388195551066, "res": {"No": 0.6214828891170465, "Yes": 0.37850388195551066}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3649573797593999, "res": {"No": 0.635025451366841, "Yes": 0.3649573797593999}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.36049550180176027, "res": {"No": 0.6394882703750647, "Yes": 0.36049550180176027}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4323150171993136, "res": {"No": 0.5676650471089143, "Yes": 0.4323150171993136}, "ground_truth": 1}, {"key": "37258984", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.42599758574247976, "res": {"No": 0.573983091737353, "Yes": 0.42599758574247976}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5052008732418853, "res": {"Yes": 0.5052008732418853, "No": 0.4947749827843501}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.17304994962528797, "res": {"No": 0.8269399652528034, "Yes": 0.17304994962528797}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.42150320406138353, "res": {"No": 0.578473038101873, "Yes": 0.42150320406138353}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.43483170839614727, "res": {"No": 0.5651513769022785, "Yes": 0.43483170839614727}, "ground_truth": 1}, {"key": "37274562", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5673518619011595, "res": {"Yes": 0.5673518619011595, "No": 0.43262819149791715}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5120965282367995, "res": {"Yes": 0.5120965282367995, "No": 0.48787761183670875}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.35797884016951204, "res": {"No": 0.6419973519430382, "Yes": 0.35797884016951204}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3815153664647492, "res": {"No": 0.6184661610277459, "Yes": 0.3815153664647492}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3582408691384718, "res": {"No": 0.6417372993995997, "Yes": 0.3582408691384718}, "ground_truth": 1}, {"key": "40828068", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3335564708749334, "res": {"No": 0.6664176719158136, "Yes": 0.3335564708749334}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.38731777455816807, "res": {"No": 0.6126588642437568, "Yes": 0.38731777455816807}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5750140751285184, "res": {"Yes": 0.5750140751285184, "No": 0.4249587132800648}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.30826095445568363, "res": {"No": 0.6917085656339368, "Yes": 0.30826095445568363}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.27609258958815647, "res": {"No": 0.7238887924375134, "Yes": 0.27609258958815647}, "ground_truth": 1}, {"key": "37807180", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3667928983071662, "res": {"No": 0.6331752556450431, "Yes": 0.3667928983071662}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.20954388219555317, "res": {"No": 0.7904378946986947, "Yes": 0.20954388219555317}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4500668732664314, "res": {"No": 0.5499136571936463, "Yes": 0.4500668732664314}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4789696043770547, "res": {"No": 0.5210105437028765, "Yes": 0.4789696043770547}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5189050554641478, "res": {"Yes": 0.5189050554641478, "No": 0.48107355479298386}, "ground_truth": 1}, {"key": "40748607", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4655391884938296, "res": {"No": 0.5344350186477009, "Yes": 0.4655391884938296}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4827079097393891, "res": {"No": 0.5172717572114682, "Yes": 0.4827079097393891}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.11444216417244112, "res": {"No": 0.885542683686521, "Yes": 0.11444216417244112}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.336846345728018, "res": {"No": 0.6631332490702679, "Yes": 0.336846345728018}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.21724528894933923, "res": {"No": 0.7827334049175367, "Yes": 0.21724528894933923}, "ground_truth": 1}, {"key": "40123819", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.269255913588053, "res": {"No": 0.7307208034602917, "Yes": 0.269255913588053}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2807594854010593, "res": {"No": 0.7192180851045248, "Yes": 0.2807594854010593}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.19773360173406676, "res": {"No": 0.8022451776834625, "Yes": 0.19773360173406676}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2805885829869189, "res": {"No": 0.7193960992227849, "Yes": 0.2805885829869189}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4134242680093097, "res": {"No": 0.5865473975012593, "Yes": 0.4134242680093097}, "ground_truth": 1}, {"key": "38453867", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.37625837651653565, "res": {"No": 0.623713431804832, "Yes": 0.37625837651653565}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4008413363194164, "res": {"No": 0.5991390325719218, "Yes": 0.4008413363194164}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2813210880191093, "res": {"No": 0.7186608710899189, "Yes": 0.2813210880191093}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3880206574211281, "res": {"No": 0.6119553479485298, "Yes": 0.3880206574211281}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4206399907461582, "res": {"No": 0.579340031522619, "Yes": 0.4206399907461582}, "ground_truth": 1}, {"key": "38944856", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.36156017212137564, "res": {"No": 0.6384268534859929, "Yes": 0.36156017212137564}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.25462571408030577, "res": {"No": 0.7453594304258958, "Yes": 0.25462571408030577}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.29458988801488084, "res": {"No": 0.7053956908516648, "Yes": 0.29458988801488084}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3294733527781011, "res": {"No": 0.6704994610928173, "Yes": 0.3294733527781011}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2611508419750277, "res": {"No": 0.7388305652736785, "Yes": 0.2611508419750277}, "ground_truth": 1}, {"key": "35778898", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.1823996901650268, "res": {"No": 0.8175819226817829, "Yes": 0.1823996901650268}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.32507885347643783, "res": {"No": 0.674897804710971, "Yes": 0.32507885347643783}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2834275341364956, "res": {"No": 0.7165490473316847, "Yes": 0.2834275341364956}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.39091040049538195, "res": {"No": 0.6090653195275327, "Yes": 0.39091040049538195}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3634403116611964, "res": {"No": 0.636537739830551, "Yes": 0.3634403116611964}, "ground_truth": 1}, {"key": "32530125", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.42154310122775157, "res": {"No": 0.5784329166033799, "Yes": 0.42154310122775157}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3244753528370047, "res": {"No": 0.6755015277002898, "Yes": 0.3244753528370047}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.11901199412301121, "res": {"No": 0.8809728330623635, "Yes": 0.11901199412301121}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3210571931230244, "res": {"No": 0.678927813523966, "Yes": 0.3210571931230244}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3586926088631765, "res": {"No": 0.6412939800344155, "Yes": 0.3586926088631765}, "ground_truth": 1}, {"key": "35010363", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.436717345409319, "res": {"No": 0.5632688126781723, "Yes": 0.436717345409319}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5251084007946528, "res": {"Yes": 0.5251084007946528, "No": 0.4748757158624203}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5327328340078797, "res": {"Yes": 0.5327328340078797, "No": 0.4672458100228472}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.38000754545436016, "res": {"No": 0.6199741657229086, "Yes": 0.38000754545436016}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4321414775603219, "res": {"No": 0.5678388586904175, "Yes": 0.4321414775603219}, "ground_truth": 1}, {"key": "27514800", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5126396588598255, "res": {"Yes": 0.5126396588598255, "No": 0.4873379763460659}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5329509954699837, "res": {"Yes": 0.5329509954699837, "No": 0.46702186606517787}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.11327678844840598, "res": {"No": 0.8867134979787659, "Yes": 0.11327678844840598}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3036091790174131, "res": {"No": 0.6963695339241887, "Yes": 0.3036091790174131}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5284721877174056, "res": {"Yes": 0.5284721877174056, "No": 0.4715099850181422}, "ground_truth": 1}, {"key": "25725840", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5032646369684736, "res": {"Yes": 0.5032646369684736, "No": 0.496716916334806}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.43883609303928633, "res": {"No": 0.5611471547072898, "Yes": 0.43883609303928633}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3031149719695274, "res": {"No": 0.6968517208150953, "Yes": 0.3031149719695274}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3457268038703269, "res": {"No": 0.6542423044766628, "Yes": 0.3457268038703269}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3004405144030905, "res": {"No": 0.6995198065365911, "Yes": 0.3004405144030905}, "ground_truth": 1}, {"key": "38327225", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3975161407705757, "res": {"No": 0.6024468098024591, "Yes": 0.3975161407705757}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.41553856629202796, "res": {"No": 0.5844238727048943, "Yes": 0.41553856629202796}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.30179771618192486, "res": {"No": 0.6981815568096766, "Yes": 0.30179771618192486}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.47010077141219264, "res": {"No": 0.5298738392090558, "Yes": 0.47010077141219264}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4307373936715147, "res": {"No": 0.5692233449453746, "Yes": 0.4307373936715147}, "ground_truth": 1}, {"key": "11991724", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.43126596113238375, "res": {"No": 0.5686980804527177, "Yes": 0.43126596113238375}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.42227276972489136, "res": {"No": 0.57770270144412, "Yes": 0.42227276972489136}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4489480048775108, "res": {"No": 0.5510412878827293, "Yes": 0.4489480048775108}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4758861376418292, "res": {"No": 0.5241001693755294, "Yes": 0.4758861376418292}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4174981148628432, "res": {"No": 0.5824858999090362, "Yes": 0.4174981148628432}, "ground_truth": 1}, {"key": "32217545", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5095036563478322, "res": {"Yes": 0.5095036563478322, "No": 0.4904739060826855}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.40626560653090277, "res": {"No": 0.5937140932430297, "Yes": 0.40626560653090277}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3489598173295442, "res": {"No": 0.65101830427692, "Yes": 0.3489598173295442}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5561745250561966, "res": {"Yes": 0.5561745250561966, "No": 0.4437907550693343}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5124824871336334, "res": {"Yes": 0.5124824871336334, "No": 0.4874860033662475}, "ground_truth": 1}, {"key": "12731847", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.541189004381096, "res": {"Yes": 0.541189004381096, "No": 0.45877669067404653}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.31733991262875527, "res": {"No": 0.6826379143699254, "Yes": 0.31733991262875527}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.19093582541219092, "res": {"No": 0.8090474427072814, "Yes": 0.19093582541219092}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.29769306693364556, "res": {"No": 0.7022863866203356, "Yes": 0.29769306693364556}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.37040269511803087, "res": {"No": 0.6295764797663215, "Yes": 0.37040269511803087}, "ground_truth": 1}, {"key": "36827234", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5653395810352171, "res": {"Yes": 0.5653395810352171, "No": 0.4346389641406166}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.30571040928830856, "res": {"No": 0.6942648590495991, "Yes": 0.30571040928830856}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.29443626999635436, "res": {"No": 0.7055496955399778, "Yes": 0.29443626999635436}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4156764232967873, "res": {"No": 0.5843045163185803, "Yes": 0.4156764232967873}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.308528270889211, "res": {"No": 0.6914479512861028, "Yes": 0.308528270889211}, "ground_truth": 1}, {"key": "29111539", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.20449580850855847, "res": {"No": 0.7954877821718429, "Yes": 0.20449580850855847}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3403209962985394, "res": {"No": 0.6596596426048495, "Yes": 0.3403209962985394}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3241244603722916, "res": {"No": 0.6758620444655193, "Yes": 0.3241244603722916}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.22428491351875832, "res": {"No": 0.7757013663853592, "Yes": 0.22428491351875832}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3498650745881754, "res": {"No": 0.6501190363584991, "Yes": 0.3498650745881754}, "ground_truth": 1}, {"key": "37763052", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3268597824150006, "res": {"No": 0.6731268720113458, "Yes": 0.3268597824150006}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3838972270908647, "res": {"No": 0.6160855963428217, "Yes": 0.3838972270908647}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.11453753421130322, "res": {"No": 0.8854509904891225, "Yes": 0.11453753421130322}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3915919011401254, "res": {"No": 0.6083944992352817, "Yes": 0.3915919011401254}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3641203624252196, "res": {"No": 0.635860766631058, "Yes": 0.3641203624252196}, "ground_truth": 1}, {"key": "30682335", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6731375007685687, "res": {"Yes": 0.6731375007685687, "No": 0.32684190367389054}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.339723611266686, "res": {"No": 0.6602611766777803, "Yes": 0.339723611266686}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.11785530921494775, "res": {"No": 0.8821304953569526, "Yes": 0.11785530921494775}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3947360362998392, "res": {"No": 0.6052460193830823, "Yes": 0.3947360362998392}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5409120947158815, "res": {"Yes": 0.5409120947158815, "No": 0.45906874908312023}, "ground_truth": 1}, {"key": "12261276", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4712983999639366, "res": {"No": 0.528683587154889, "Yes": 0.4712983999639366}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4145370432017606, "res": {"No": 0.5854434410573813, "Yes": 0.4145370432017606}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2898724153139655, "res": {"No": 0.7101072812565129, "Yes": 0.2898724153139655}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5260825707248075, "res": {"Yes": 0.5260825707248075, "No": 0.4738955690597875}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4471134560088218, "res": {"No": 0.5528627665561336, "Yes": 0.4471134560088218}, "ground_truth": 1}, {"key": "36912979", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3816922207397287, "res": {"No": 0.6182891813394612, "Yes": 0.3816922207397287}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4220388953996335, "res": {"No": 0.5779391457627592, "Yes": 0.4220388953996335}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.39912000216696425, "res": {"No": 0.6008644323649074, "Yes": 0.39912000216696425}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2871078355985281, "res": {"No": 0.7128739659794363, "Yes": 0.2871078355985281}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3213543726921718, "res": {"No": 0.6786212042957598, "Yes": 0.3213543726921718}, "ground_truth": 1}, {"key": "30205259", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3806789728422327, "res": {"No": 0.6192999510989108, "Yes": 0.3806789728422327}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3355321164594799, "res": {"No": 0.6644506101013401, "Yes": 0.3355321164594799}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4283427654746459, "res": {"No": 0.571641061175499, "Yes": 0.4283427654746459}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.00818640849905718, "res": {"No": 0.9918040950783554, "Yes": 0.00818640849905718}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.47667355187862626, "res": {"No": 0.5233048487130232, "Yes": 0.47667355187862626}, "ground_truth": 1}, {"key": "39458032", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3134322032098683, "res": {"No": 0.6865511830738494, "Yes": 0.3134322032098683}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.35742890634308633, "res": {"No": 0.6425411805663624, "Yes": 0.35742890634308633}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.16320127891399877, "res": {"No": 0.8367815033023226, "Yes": 0.16320127891399877}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.24456452474413284, "res": {"No": 0.7554095926617429, "Yes": 0.24456452474413284}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4253283330358167, "res": {"No": 0.5746367080207566, "Yes": 0.4253283330358167}, "ground_truth": 1}, {"key": "35116452", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.29196661950856584, "res": {"No": 0.708007140420206, "Yes": 0.29196661950856584}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.27838088864128924, "res": {"No": 0.7215983621385134, "Yes": 0.27838088864128924}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4799949768647796, "res": {"No": 0.5199832185222577, "Yes": 0.4799949768647796}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2829347707374785, "res": {"No": 0.71705311616762, "Yes": 0.2829347707374785}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.414577810779098, "res": {"No": 0.5853960220591511, "Yes": 0.414577810779098}, "ground_truth": 1}, {"key": "40107476", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4111642127000932, "res": {"No": 0.5888038045832185, "Yes": 0.4111642127000932}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2360153815549733, "res": {"No": 0.7639697231390739, "Yes": 0.2360153815549733}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.25822948827082604, "res": {"No": 0.7417534119178091, "Yes": 0.25822948827082604}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2737335749046034, "res": {"No": 0.726250995555814, "Yes": 0.2737335749046034}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.18974285845793395, "res": {"No": 0.810239537277282, "Yes": 0.18974285845793395}, "ground_truth": 1}, {"key": "39501049", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2720614551921224, "res": {"No": 0.7279214754470754, "Yes": 0.2720614551921224}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2511735611817305, "res": {"No": 0.74880812738359, "Yes": 0.2511735611817305}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3020127330738163, "res": {"No": 0.6979717568528118, "Yes": 0.3020127330738163}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3699114967238243, "res": {"No": 0.6300645024476429, "Yes": 0.3699114967238243}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4534194970312004, "res": {"No": 0.5465581077152608, "Yes": 0.4534194970312004}, "ground_truth": 1}, {"key": "39642178", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.36609423721251855, "res": {"No": 0.6338850691000134, "Yes": 0.36609423721251855}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.27633684624729393, "res": {"No": 0.7236425673555873, "Yes": 0.27633684624729393}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3447647770976212, "res": {"No": 0.6552201453793332, "Yes": 0.3447647770976212}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.37206105940228573, "res": {"No": 0.6279108758634853, "Yes": 0.37206105940228573}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5710967751851995, "res": {"Yes": 0.5710967751851995, "No": 0.42887378963219397}, "ground_truth": 1}, {"key": "38024796", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5280686419671313, "res": {"Yes": 0.5280686419671313, "No": 0.4718999906798785}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5159776440106032, "res": {"Yes": 0.5159776440106032, "No": 0.4839973923435293}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.43222800526711125, "res": {"No": 0.5677586855039306, "Yes": 0.43222800526711125}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3299829475327327, "res": {"No": 0.6699999106200799, "Yes": 0.3299829475327327}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2900621989096954, "res": {"No": 0.7099259572132781, "Yes": 0.2900621989096954}, "ground_truth": 1}, {"key": "36652079", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3180489109532864, "res": {"No": 0.6819326838789798, "Yes": 0.3180489109532864}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.27990643286467937, "res": {"No": 0.7200808223558135, "Yes": 0.27990643286467937}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.48316383980618083, "res": {"No": 0.5168200659318459, "Yes": 0.48316383980618083}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.46801988249651316, "res": {"No": 0.5319610003182998, "Yes": 0.46801988249651316}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.41602979445479116, "res": {"No": 0.583954097157977, "Yes": 0.41602979445479116}, "ground_truth": 1}, {"key": "32193402", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4468805258821111, "res": {"No": 0.5530998517589901, "Yes": 0.4468805258821111}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.6338640371423427, "res": {"Yes": 0.6338640371423427, "No": 0.36611770460524584}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.23614210862229384, "res": {"No": 0.7638450763689325, "Yes": 0.23614210862229384}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4300867957853623, "res": {"No": 0.5698908999899143, "Yes": 0.4300867957853623}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5419519904344641, "res": {"Yes": 0.5419519904344641, "No": 0.4580288067554111}, "ground_truth": 1}, {"key": "32589706", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.48054468885275725, "res": {"No": 0.5194351540535576, "Yes": 0.48054468885275725}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5300603028213983, "res": {"Yes": 0.5300603028213983, "No": 0.46992548941749857}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2674981576391698, "res": {"No": 0.7324708383570891, "Yes": 0.2674981576391698}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3278393425147682, "res": {"No": 0.6721401524575205, "Yes": 0.3278393425147682}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.47999454486949483, "res": {"No": 0.5199787882841087, "Yes": 0.47999454486949483}, "ground_truth": 1}, {"key": "38590589", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3547321433780979, "res": {"No": 0.6452433993736587, "Yes": 0.3547321433780979}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.26686456405697445, "res": {"No": 0.7331147580032726, "Yes": 0.26686456405697445}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.19876900646646598, "res": {"No": 0.8011766254835828, "Yes": 0.19876900646646598}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.309408366549996, "res": {"No": 0.690561509809179, "Yes": 0.309408366549996}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4191200016270963, "res": {"No": 0.5808160392873419, "Yes": 0.4191200016270963}, "ground_truth": 1}, {"key": "37045414", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3650996313662174, "res": {"No": 0.6348550651495598, "Yes": 0.3650996313662174}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2694343547329078, "res": {"No": 0.7305387889664003, "Yes": 0.2694343547329078}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.09434354735692076, "res": {"No": 0.9056319696327679, "Yes": 0.09434354735692076}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.09626304331832372, "res": {"No": 0.9037200100140097, "Yes": 0.09626304331832372}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.1290261734809177, "res": {"No": 0.870954510712826, "Yes": 0.1290261734809177}, "ground_truth": 1}, {"key": "33310095", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.22901982663122566, "res": {"No": 0.770959427515465, "Yes": 0.22901982663122566}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2953435866119864, "res": {"No": 0.7046259742013864, "Yes": 0.2953435866119864}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1881674539690368, "res": {"No": 0.8118231484639171, "Yes": 0.1881674539690368}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4568996026858319, "res": {"No": 0.5430838777408448, "Yes": 0.4568996026858319}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5164903624181091, "res": {"Yes": 0.5164903624181091, "No": 0.4834786124099823}, "ground_truth": 1}, {"key": "37934604", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5113636489983513, "res": {"Yes": 0.5113636489983513, "No": 0.4886124839669246}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.29083278469819157, "res": {"No": 0.7091545451281897, "Yes": 0.29083278469819157}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.08712503571267106, "res": {"No": 0.9128610048588706, "Yes": 0.08712503571267106}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.18569495100141953, "res": {"No": 0.8142877615774894, "Yes": 0.18569495100141953}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.19074876181097025, "res": {"No": 0.8092219568786562, "Yes": 0.19074876181097025}, "ground_truth": 1}, {"key": "39012181", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2844207565688967, "res": {"No": 0.7155480402724128, "Yes": 0.2844207565688967}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.21452641528808103, "res": {"No": 0.7854462118122092, "Yes": 0.21452641528808103}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1012729431148402, "res": {"No": 0.8987138799155978, "Yes": 0.1012729431148402}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.28256397326417715, "res": {"No": 0.7174203413647262, "Yes": 0.28256397326417715}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.28691132014823567, "res": {"No": 0.713068364696173, "Yes": 0.28691132014823567}, "ground_truth": 1}, {"key": "40221674", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3919483374877916, "res": {"No": 0.6080348375837451, "Yes": 0.3919483374877916}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.23677238504055265, "res": {"No": 0.7632050057292555, "Yes": 0.23677238504055265}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2348820983230028, "res": {"No": 0.7651019087312684, "Yes": 0.2348820983230028}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4841248746829145, "res": {"No": 0.5158517453482854, "Yes": 0.4841248746829145}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.44727708475642924, "res": {"No": 0.5527044493319007, "Yes": 0.44727708475642924}, "ground_truth": 1}, {"key": "36884862", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4133891532433302, "res": {"No": 0.5865881932914125, "Yes": 0.4133891532433302}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3881597489484607, "res": {"No": 0.6118140392980125, "Yes": 0.3881597489484607}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.20397870048014838, "res": {"No": 0.7960077237351244, "Yes": 0.20397870048014838}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.23595605474574277, "res": {"No": 0.764025395641202, "Yes": 0.23595605474574277}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.29345302606548584, "res": {"No": 0.7065182822364049, "Yes": 0.29345302606548584}, "ground_truth": 1}, {"key": "39054429", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.322848340526099, "res": {"No": 0.6771262086719624, "Yes": 0.322848340526099}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.43131758675766335, "res": {"No": 0.5686576645163373, "Yes": 0.43131758675766335}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2693331203255357, "res": {"No": 0.7306501680944365, "Yes": 0.2693331203255357}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5153042376632175, "res": {"Yes": 0.5153042376632175, "No": 0.4846711302524075}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.18320049953983508, "res": {"No": 0.8167788387272802, "Yes": 0.18320049953983508}, "ground_truth": 1}, {"key": "36753964", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.35519480270336, "res": {"No": 0.6447788430339157, "Yes": 0.35519480270336}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3818652810445447, "res": {"No": 0.6181084200114316, "Yes": 0.3818652810445447}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.22222671044005116, "res": {"No": 0.777746665951861, "Yes": 0.22222671044005116}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.28554941001754924, "res": {"No": 0.714430240777572, "Yes": 0.28554941001754924}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3006859843952308, "res": {"No": 0.6992866045279309, "Yes": 0.3006859843952308}, "ground_truth": 1}, {"key": "37612459", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.15641524205887677, "res": {"No": 0.8435688720527821, "Yes": 0.15641524205887677}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2478376461045756, "res": {"No": 0.7521290956639307, "Yes": 0.2478376461045756}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3329483187484169, "res": {"No": 0.667023187368394, "Yes": 0.3329483187484169}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.31034968534326973, "res": {"No": 0.6896251292285251, "Yes": 0.31034968534326973}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3444204257396794, "res": {"No": 0.6555552975450307, "Yes": 0.3444204257396794}, "ground_truth": 1}, {"key": "36805789", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.26998726169611487, "res": {"No": 0.7299869074503901, "Yes": 0.26998726169611487}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3221283855546361, "res": {"No": 0.6778466476717898, "Yes": 0.3221283855546361}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.10032483575096957, "res": {"No": 0.8996617406742929, "Yes": 0.10032483575096957}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.33168684272671883, "res": {"No": 0.6682951244118198, "Yes": 0.33168684272671883}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2897596768734567, "res": {"No": 0.7102265040741735, "Yes": 0.2897596768734567}, "ground_truth": 1}, {"key": "12757394", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3222870406293569, "res": {"No": 0.6776969278844786, "Yes": 0.3222870406293569}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2579084505143031, "res": {"No": 0.7420771837486451, "Yes": 0.2579084505143031}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.06428744007643432, "res": {"No": 0.9357020741687413, "Yes": 0.06428744007643432}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.11235448064675656, "res": {"No": 0.8876335589474675, "Yes": 0.11235448064675656}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2334998369874456, "res": {"No": 0.7664829690729692, "Yes": 0.2334998369874456}, "ground_truth": 1}, {"key": "32192542", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2508204335643738, "res": {"No": 0.7491643227559331, "Yes": 0.2508204335643738}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.400539416135214, "res": {"No": 0.5994441319098509, "Yes": 0.400539416135214}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4817261595954464, "res": {"No": 0.5182579280761734, "Yes": 0.4817261595954464}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3043609032425426, "res": {"No": 0.6956241213930179, "Yes": 0.3043609032425426}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5570986751929103, "res": {"Yes": 0.5570986751929103, "No": 0.44288377600782686}, "ground_truth": 1}, {"key": "34856060", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5974157794541483, "res": {"Yes": 0.5974157794541483, "No": 0.4025690113702753}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.37453909086135, "res": {"No": 0.6254465527301114, "Yes": 0.37453909086135}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.22231579675730098, "res": {"No": 0.777662253909863, "Yes": 0.22231579675730098}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3395204154221634, "res": {"No": 0.6604555993929812, "Yes": 0.3395204154221634}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2553887461449467, "res": {"No": 0.7445875560088905, "Yes": 0.2553887461449467}, "ground_truth": 1}, {"key": "36083416", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2406459049498683, "res": {"No": 0.7593263198920646, "Yes": 0.2406459049498683}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.21707210489772155, "res": {"No": 0.7829049205954106, "Yes": 0.21707210489772155}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.38225601443493196, "res": {"No": 0.6177268126412455, "Yes": 0.38225601443493196}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3263316446017768, "res": {"No": 0.6736523852508587, "Yes": 0.3263316446017768}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5058098706549758, "res": {"Yes": 0.5058098706549758, "No": 0.4941757281743885}, "ground_truth": 1}, {"key": "33839050", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4015534213601808, "res": {"No": 0.5984303709600987, "Yes": 0.4015534213601808}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.30808754480999084, "res": {"No": 0.6918998494961098, "Yes": 0.30808754480999084}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.0765648947779387, "res": {"No": 0.9234225348447681, "Yes": 0.0765648947779387}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.33771869274948263, "res": {"No": 0.6622687756032903, "Yes": 0.33771869274948263}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4033299570681135, "res": {"No": 0.5966563258161593, "Yes": 0.4033299570681135}, "ground_truth": 1}, {"key": "18464690", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.31742014629977655, "res": {"No": 0.6825683844131885, "Yes": 0.31742014629977655}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3295051814412791, "res": {"No": 0.670477220994547, "Yes": 0.3295051814412791}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.40174171373639894, "res": {"No": 0.5982464417354357, "Yes": 0.40174171373639894}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.31651102622849875, "res": {"No": 0.683475451047061, "Yes": 0.31651102622849875}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5225348232476941, "res": {"Yes": 0.5225348232476941, "No": 0.4774369355166954}, "ground_truth": 1}, {"key": "39212665", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.33596775193798645, "res": {"No": 0.6640138176555558, "Yes": 0.33596775193798645}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5270356266364629, "res": {"Yes": 0.5270356266364629, "No": 0.4729365704552142}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1777481887898372, "res": {"No": 0.8222427098922576, "Yes": 0.1777481887898372}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2688246689974725, "res": {"No": 0.7311608517534449, "Yes": 0.2688246689974725}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.33929409867154997, "res": {"No": 0.6606891645733695, "Yes": 0.33929409867154997}, "ground_truth": 1}, {"key": "40094011", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.32224695061515324, "res": {"No": 0.6777392581565548, "Yes": 0.32224695061515324}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.37210883510944776, "res": {"No": 0.6278741064792127, "Yes": 0.37210883510944776}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.20928408324191344, "res": {"No": 0.7906950817863816, "Yes": 0.20928408324191344}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.20427268225003822, "res": {"No": 0.7957087198047225, "Yes": 0.20427268225003822}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.16008585722731553, "res": {"No": 0.8398961968158923, "Yes": 0.16008585722731553}, "ground_truth": 1}, {"key": "36036272", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.29058640832989274, "res": {"No": 0.7093948970506252, "Yes": 0.29058640832989274}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.227478238525344, "res": {"No": 0.7725034223449886, "Yes": 0.227478238525344}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2592922694584866, "res": {"No": 0.7406879370305445, "Yes": 0.2592922694584866}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.250840424749567, "res": {"No": 0.7491412113930708, "Yes": 0.250840424749567}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2722862163329222, "res": {"No": 0.7276923181694396, "Yes": 0.2722862163329222}, "ground_truth": 1}, {"key": "30681904", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.23683850111983115, "res": {"No": 0.7631400292211569, "Yes": 0.23683850111983115}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.22730614812165045, "res": {"No": 0.7726747517002246, "Yes": 0.22730614812165045}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.009317294333466583, "res": {"No": 0.9906762383485859, "Yes": 0.009317294333466583}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3468640023225933, "res": {"No": 0.6531160482183878, "Yes": 0.3468640023225933}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.37935448288056894, "res": {"No": 0.6206263431473416, "Yes": 0.37935448288056894}, "ground_truth": 1}, {"key": "27834240", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4336778940680671, "res": {"No": 0.5662983610522362, "Yes": 0.4336778940680671}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.42066730799413865, "res": {"No": 0.5793157808564695, "Yes": 0.42066730799413865}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2383196116762533, "res": {"No": 0.7616647353276609, "Yes": 0.2383196116762533}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2953074978307718, "res": {"No": 0.7046662306332065, "Yes": 0.2953074978307718}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3171057387494658, "res": {"No": 0.6828749264313231, "Yes": 0.3171057387494658}, "ground_truth": 1}, {"key": "35025075", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.43198291505667824, "res": {"No": 0.5679933091476482, "Yes": 0.43198291505667824}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.28525299235664153, "res": {"No": 0.7147253756997353, "Yes": 0.28525299235664153}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.331766490298966, "res": {"No": 0.6682134169622447, "Yes": 0.331766490298966}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.40360385056525105, "res": {"No": 0.5963740548333648, "Yes": 0.40360385056525105}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.46830485292093865, "res": {"No": 0.5316760838593759, "Yes": 0.46830485292093865}, "ground_truth": 1}, {"key": "33316985", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3591705980088324, "res": {"No": 0.640811267431553, "Yes": 0.3591705980088324}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.30910016377484434, "res": {"No": 0.6908783637602238, "Yes": 0.30910016377484434}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.10453360294333586, "res": {"No": 0.8954549552275096, "Yes": 0.10453360294333586}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4163711370856548, "res": {"No": 0.5836089363462523, "Yes": 0.4163711370856548}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.532419231945683, "res": {"Yes": 0.532419231945683, "No": 0.46755630453377894}, "ground_truth": 1}, {"key": "17037056", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5390018531246079, "res": {"Yes": 0.5390018531246079, "No": 0.4609799635247462}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3487784053957234, "res": {"No": 0.6511988436611534, "Yes": 0.3487784053957234}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.14546141740634608, "res": {"No": 0.8545239830979953, "Yes": 0.14546141740634608}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4643578479342794, "res": {"No": 0.535618647327041, "Yes": 0.4643578479342794}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4836287558276949, "res": {"No": 0.5163453781019859, "Yes": 0.4836287558276949}, "ground_truth": 1}, {"key": "34050457", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.45067992144025076, "res": {"No": 0.5492958465571187, "Yes": 0.45067992144025076}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3808317509291395, "res": {"No": 0.619146848114138, "Yes": 0.3808317509291395}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3031696588430784, "res": {"No": 0.6968138618895504, "Yes": 0.3031696588430784}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.36584358511506065, "res": {"No": 0.6341243551556243, "Yes": 0.36584358511506065}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4582412897869903, "res": {"No": 0.541727890856607, "Yes": 0.4582412897869903}, "ground_truth": 1}, {"key": "34713745", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.379389498915304, "res": {"No": 0.6205829939112625, "Yes": 0.379389498915304}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.357815209652573, "res": {"No": 0.642162995785373, "Yes": 0.357815209652573}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.19573280296521367, "res": {"No": 0.8042474850040012, "Yes": 0.19573280296521367}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.12527866536665175, "res": {"No": 0.8747048970709282, "Yes": 0.12527866536665175}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2680000263903044, "res": {"No": 0.7319788930995542, "Yes": 0.2680000263903044}, "ground_truth": 1}, {"key": "40856210", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.30697053905193217, "res": {"No": 0.6930009423366588, "Yes": 0.30697053905193217}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.26818482249115455, "res": {"No": 0.7317937258642431, "Yes": 0.26818482249115455}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.18025993255643183, "res": {"No": 0.819729123903561, "Yes": 0.18025993255643183}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5052766893732661, "res": {"Yes": 0.5052766893732661, "No": 0.49470106892342497}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4266187592707359, "res": {"No": 0.573360428470066, "Yes": 0.4266187592707359}, "ground_truth": 1}, {"key": "40848302", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4662017493630621, "res": {"No": 0.5337806138098884, "Yes": 0.4662017493630621}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4856066038343171, "res": {"No": 0.5143742837594428, "Yes": 0.4856066038343171}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.03717538240765711, "res": {"No": 0.9628121150591186, "Yes": 0.03717538240765711}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3154532769637705, "res": {"No": 0.6845281569757162, "Yes": 0.3154532769637705}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3783165499819212, "res": {"No": 0.621655312384489, "Yes": 0.3783165499819212}, "ground_truth": 1}, {"key": "40636168", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4179994760195136, "res": {"No": 0.5819731013095236, "Yes": 0.4179994760195136}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.11948793201886072, "res": {"No": 0.8804947619614745, "Yes": 0.11948793201886072}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.0824733143822469, "res": {"No": 0.9175070593211626, "Yes": 0.0824733143822469}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.37609056508789374, "res": {"No": 0.6238896931731561, "Yes": 0.37609056508789374}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.46513676423694916, "res": {"No": 0.5348394342656262, "Yes": 0.46513676423694916}, "ground_truth": 1}, {"key": "34423311", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.49863560524652273, "res": {"No": 0.5013334652544355, "Yes": 0.49863560524652273}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.48545973490703404, "res": {"No": 0.5145208858866978, "Yes": 0.48545973490703404}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.366286340633682, "res": {"No": 0.6336892415463196, "Yes": 0.366286340633682}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3462626804198403, "res": {"No": 0.653720884978808, "Yes": 0.3462626804198403}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3081010085298792, "res": {"No": 0.69187989539219, "Yes": 0.3081010085298792}, "ground_truth": 1}, {"key": "34833945", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.27629086762160615, "res": {"No": 0.7236890990686468, "Yes": 0.27629086762160615}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.25218374760851703, "res": {"No": 0.747799563613304, "Yes": 0.25218374760851703}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.0166145344072959, "res": {"No": 0.9833774597511502, "Yes": 0.0166145344072959}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.25097345560613293, "res": {"No": 0.7490111268258701, "Yes": 0.25097345560613293}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2684760414804037, "res": {"No": 0.731500730419177, "Yes": 0.2684760414804037}, "ground_truth": 1}, {"key": "21272328", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3026794361548476, "res": {"No": 0.6973012166188428, "Yes": 0.3026794361548476}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.22002381246717032, "res": {"No": 0.7799598850844077, "Yes": 0.22002381246717032}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.09970967156990343, "res": {"No": 0.9002783191173577, "Yes": 0.09970967156990343}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2470192278098368, "res": {"No": 0.7529628036089657, "Yes": 0.2470192278098368}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3579421134246248, "res": {"No": 0.6420367075869504, "Yes": 0.3579421134246248}, "ground_truth": 1}, {"key": "38648957", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3320921461269428, "res": {"No": 0.6678864065369728, "Yes": 0.3320921461269428}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3096410362570851, "res": {"No": 0.6903351670369041, "Yes": 0.3096410362570851}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.24661092172476712, "res": {"No": 0.7533674641623654, "Yes": 0.24661092172476712}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5021585361921053, "res": {"Yes": 0.5021585361921053, "No": 0.497818513052655}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3223553404893809, "res": {"No": 0.6776258547578219, "Yes": 0.3223553404893809}, "ground_truth": 1}, {"key": "24942981", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4555137391267082, "res": {"No": 0.5444563527831447, "Yes": 0.4555137391267082}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.25127751830859363, "res": {"No": 0.7487008683019575, "Yes": 0.25127751830859363}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.0016255278259052854, "res": {"No": 0.9983684041021875, "Yes": 0.0016255278259052854}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2741867048726763, "res": {"No": 0.7257914967244701, "Yes": 0.2741867048726763}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2673967148278226, "res": {"No": 0.7325772228218647, "Yes": 0.2673967148278226}, "ground_truth": 1}, {"key": "35882366", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.24239988480839075, "res": {"No": 0.7575795125837602, "Yes": 0.24239988480839075}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.27138149972175785, "res": {"No": 0.7285951871813268, "Yes": 0.27138149972175785}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3260834006890239, "res": {"No": 0.6739056250869703, "Yes": 0.3260834006890239}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.43426863574396873, "res": {"No": 0.5657139597435424, "Yes": 0.43426863574396873}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3893426791048697, "res": {"No": 0.6106462600031144, "Yes": 0.3893426791048697}, "ground_truth": 1}, {"key": "40559523", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.47326976280860616, "res": {"No": 0.5267153021702363, "Yes": 0.47326976280860616}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4202361978270622, "res": {"No": 0.5797484363273648, "Yes": 0.4202361978270622}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.11169149692737108, "res": {"No": 0.8882926228825505, "Yes": 0.11169149692737108}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.42316165915587806, "res": {"No": 0.5768212802815186, "Yes": 0.42316165915587806}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3537354010474853, "res": {"No": 0.6462506159796192, "Yes": 0.3537354010474853}, "ground_truth": 1}, {"key": "24632722", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.36081435680398805, "res": {"No": 0.6391669762847223, "Yes": 0.36081435680398805}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.40701401469188, "res": {"No": 0.5929727404073228, "Yes": 0.40701401469188}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5114548997549363, "res": {"Yes": 0.5114548997549363, "No": 0.48852354993104496}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5013157684954616, "res": {"Yes": 0.5013157684954616, "No": 0.4986569971728413}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.40513566202576085, "res": {"No": 0.5948382613813793, "Yes": 0.40513566202576085}, "ground_truth": 1}, {"key": "36002759", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5162298587478453, "res": {"Yes": 0.5162298587478453, "No": 0.48374009998250694}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5309113610590437, "res": {"Yes": 0.5309113610590437, "No": 0.4690659275246128}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2418943060714869, "res": {"No": 0.7580902553844893, "Yes": 0.2418943060714869}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3804622949576749, "res": {"No": 0.6195226294553672, "Yes": 0.3804622949576749}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3175553643327218, "res": {"No": 0.6824320618190765, "Yes": 0.3175553643327218}, "ground_truth": 1}, {"key": "29508534", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.33476502874138453, "res": {"No": 0.6652241150435726, "Yes": 0.33476502874138453}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3048342477796296, "res": {"No": 0.6951556024912576, "Yes": 0.3048342477796296}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3659788990908483, "res": {"No": 0.6339996605215645, "Yes": 0.3659788990908483}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5278786770464902, "res": {"Yes": 0.5278786770464902, "No": 0.4720938870196312}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6003111048869338, "res": {"Yes": 0.6003111048869338, "No": 0.39966159597705236}, "ground_truth": 1}, {"key": "15631612", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5623025436732559, "res": {"Yes": 0.5623025436732559, "No": 0.43766613855538616}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3871680664469664, "res": {"No": 0.6128135188576832, "Yes": 0.3871680664469664}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.33445414299751697, "res": {"No": 0.6655298091758977, "Yes": 0.33445414299751697}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5463702606537971, "res": {"Yes": 0.5463702606537971, "No": 0.45360527384217286}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.46772554378238457, "res": {"No": 0.5322504126049097, "Yes": 0.46772554378238457}, "ground_truth": 1}, {"key": "40731892", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.479922426307586, "res": {"No": 0.5200553659987501, "Yes": 0.479922426307586}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5091077679446572, "res": {"Yes": 0.5091077679446572, "No": 0.49086928923468487}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.501201080568429, "res": {"Yes": 0.501201080568429, "No": 0.4987759410501606}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3161214828102896, "res": {"No": 0.6838591935049795, "Yes": 0.3161214828102896}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.46911506475403847, "res": {"No": 0.5308545778166867, "Yes": 0.46911506475403847}, "ground_truth": 1}, {"key": "35971910", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3736075365696311, "res": {"No": 0.6263745713731425, "Yes": 0.3736075365696311}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4220849845634337, "res": {"No": 0.5778887285509136, "Yes": 0.4220849845634337}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.11647548002699289, "res": {"No": 0.883510290380905, "Yes": 0.11647548002699289}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4390376967515677, "res": {"No": 0.5609308180768176, "Yes": 0.4390376967515677}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5501564096513567, "res": {"Yes": 0.5501564096513567, "No": 0.4498157610238506}, "ground_truth": 1}, {"key": "34428424", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4542821173852489, "res": {"No": 0.5456899072572101, "Yes": 0.4542821173852489}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.08134240595102081, "res": {"No": 0.9186400252756479, "Yes": 0.08134240595102081}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3519410813227133, "res": {"No": 0.6480438727892857, "Yes": 0.3519410813227133}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.23425294169802027, "res": {"No": 0.7657249246015009, "Yes": 0.23425294169802027}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.29888553648696176, "res": {"No": 0.7010951124871327, "Yes": 0.29888553648696176}, "ground_truth": 1}, {"key": "36971005", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.30763563486341483, "res": {"No": 0.6923465180985026, "Yes": 0.30763563486341483}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.1620965878698985, "res": {"No": 0.8378804498114504, "Yes": 0.1620965878698985}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.42468220614900754, "res": {"No": 0.5752902401832325, "Yes": 0.42468220614900754}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4831476395942302, "res": {"No": 0.5168283351190541, "Yes": 0.4831476395942302}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3844967433128026, "res": {"No": 0.6154773459285332, "Yes": 0.3844967433128026}, "ground_truth": 1}, {"key": "34649067", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5003099863318009, "res": {"Yes": 0.5003099863318009, "No": 0.4996672160742774}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4575203565798239, "res": {"No": 0.5424583883563416, "Yes": 0.4575203565798239}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1211084686281317, "res": {"No": 0.8788753501990734, "Yes": 0.1211084686281317}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.37753184735393647, "res": {"No": 0.6224412453983864, "Yes": 0.37753184735393647}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3980076710056084, "res": {"No": 0.6019665500006917, "Yes": 0.3980076710056084}, "ground_truth": 1}, {"key": "37355154", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.43505102322579686, "res": {"No": 0.5649168368982127, "Yes": 0.43505102322579686}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.43231944845095005, "res": {"No": 0.567649209475036, "Yes": 0.43231944845095005}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.13064807776271806, "res": {"No": 0.869340265949795, "Yes": 0.13064807776271806}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.32338177105054583, "res": {"No": 0.6765949678979117, "Yes": 0.32338177105054583}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3013395430510809, "res": {"No": 0.6986404128101914, "Yes": 0.3013395430510809}, "ground_truth": 1}, {"key": "38674697", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3118387973049441, "res": {"No": 0.6881399472730988, "Yes": 0.3118387973049441}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3160026434759479, "res": {"No": 0.6839738794687141, "Yes": 0.3160026434759479}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.0757220408814811, "res": {"No": 0.9242461377201869, "Yes": 0.0757220408814811}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.1243680704537797, "res": {"No": 0.8756083912069584, "Yes": 0.1243680704537797}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.33944219892933614, "res": {"No": 0.6605339208615454, "Yes": 0.33944219892933614}, "ground_truth": 1}, {"key": "40525767", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.37813764856729976, "res": {"No": 0.6218225477208863, "Yes": 0.37813764856729976}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.37199967439188697, "res": {"No": 0.6279641752006204, "Yes": 0.37199967439188697}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1265572505091576, "res": {"No": 0.873427405914621, "Yes": 0.1265572505091576}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.1443147212559621, "res": {"No": 0.8556599982933061, "Yes": 0.1443147212559621}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.1913810140546912, "res": {"No": 0.8085846139240276, "Yes": 0.1913810140546912}, "ground_truth": 1}, {"key": "27165110", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2572632498141454, "res": {"No": 0.7427040880599755, "Yes": 0.2572632498141454}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.13057890485933654, "res": {"No": 0.8694023564488145, "Yes": 0.13057890485933654}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5390851191717477, "res": {"Yes": 0.5390851191717477, "No": 0.46088851417205867}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.44454542312124423, "res": {"No": 0.5554279951479111, "Yes": 0.44454542312124423}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4535425445706296, "res": {"No": 0.5464301461160281, "Yes": 0.4535425445706296}, "ground_truth": 1}, {"key": "35497491", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4995997355771349, "res": {"No": 0.5003721787282793, "Yes": 0.4995997355771349}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.44592774357884507, "res": {"No": 0.5540469547278397, "Yes": 0.44592774357884507}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4391542899099917, "res": {"No": 0.5608272798046772, "Yes": 0.4391542899099917}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.41400829877809947, "res": {"No": 0.5859738939329276, "Yes": 0.41400829877809947}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4798923841040299, "res": {"No": 0.520090283688197, "Yes": 0.4798923841040299}, "ground_truth": 1}, {"key": "40690716", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5929849854208415, "res": {"Yes": 0.5929849854208415, "No": 0.4069921423464456}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4902951952428653, "res": {"No": 0.5096814786724605, "Yes": 0.4902951952428653}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.022533148213777013, "res": {"No": 0.9774470337624078, "Yes": 0.022533148213777013}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.12255103788132718, "res": {"No": 0.8774366850802714, "Yes": 0.12255103788132718}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2683509607911379, "res": {"No": 0.7316258570046285, "Yes": 0.2683509607911379}, "ground_truth": 1}, {"key": "34835193", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.18412606870130546, "res": {"No": 0.8158542623746345, "Yes": 0.18412606870130546}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.19379319377155602, "res": {"No": 0.806186672252877, "Yes": 0.19379319377155602}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.011902045534276392, "res": {"No": 0.9880878916806126, "Yes": 0.011902045534276392}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.6235285468914741, "res": {"Yes": 0.6235285468914741, "No": 0.3764368029054756}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4605425168361769, "res": {"No": 0.5394268672534483, "Yes": 0.4605425168361769}, "ground_truth": 1}, {"key": "39471712", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5760171755889496, "res": {"Yes": 0.5760171755889496, "No": 0.4239457839366233}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.6244405812875146, "res": {"Yes": 0.6244405812875146, "No": 0.37553485093781336}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3263231600893163, "res": {"No": 0.6736529106999241, "Yes": 0.3263231600893163}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4599093151095713, "res": {"No": 0.5400678819551331, "Yes": 0.4599093151095713}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4174556115434372, "res": {"No": 0.5825232734033033, "Yes": 0.4174556115434372}, "ground_truth": 1}, {"key": "39115192", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.37136580679970266, "res": {"No": 0.6286146997431691, "Yes": 0.37136580679970266}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.42113303398582175, "res": {"No": 0.5788376719752314, "Yes": 0.42113303398582175}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.23343983523989467, "res": {"No": 0.7665505528557397, "Yes": 0.23343983523989467}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.323376629321263, "res": {"No": 0.6765984591369535, "Yes": 0.323376629321263}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.44321316924059617, "res": {"No": 0.5567627624400396, "Yes": 0.44321316924059617}, "ground_truth": 1}, {"key": "23520673", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.36810795052947504, "res": {"No": 0.6318643272205627, "Yes": 0.36810795052947504}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2657613641807156, "res": {"No": 0.734220490280345, "Yes": 0.2657613641807156}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.46858413952131284, "res": {"No": 0.5314029469699951, "Yes": 0.46858413952131284}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2785237068216648, "res": {"No": 0.7214591648098957, "Yes": 0.2785237068216648}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.49579301887827243, "res": {"No": 0.5041897821014548, "Yes": 0.49579301887827243}, "ground_truth": 1}, {"key": "35764233", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3670132969912967, "res": {"No": 0.6329711085377832, "Yes": 0.3670132969912967}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.44904001721987297, "res": {"No": 0.5509408642494835, "Yes": 0.44904001721987297}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.43487138425120625, "res": {"No": 0.5651059179539037, "Yes": 0.43487138425120625}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2007796983721985, "res": {"No": 0.7992000093089164, "Yes": 0.2007796983721985}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.39284160070548785, "res": {"No": 0.6071373115975688, "Yes": 0.39284160070548785}, "ground_truth": 1}, {"key": "35228910", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.353639728561789, "res": {"No": 0.6463309305339084, "Yes": 0.353639728561789}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3307974580871906, "res": {"No": 0.6691794825551834, "Yes": 0.3307974580871906}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.23370365449767624, "res": {"No": 0.7662745906840039, "Yes": 0.23370365449767624}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.36645465123655446, "res": {"No": 0.6335163440172982, "Yes": 0.36645465123655446}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.31192487669140917, "res": {"No": 0.6880529099559594, "Yes": 0.31192487669140917}, "ground_truth": 1}, {"key": "36795599", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4091132608014564, "res": {"No": 0.5908583315738001, "Yes": 0.4091132608014564}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4027625285089259, "res": {"No": 0.5972038300452548, "Yes": 0.4027625285089259}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.22448243845597435, "res": {"No": 0.7755068758969611, "Yes": 0.22448243845597435}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.471050416166952, "res": {"No": 0.5289297519431854, "Yes": 0.471050416166952}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.44539484415020236, "res": {"No": 0.554577786385812, "Yes": 0.44539484415020236}, "ground_truth": 1}, {"key": "38641949", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3688073713897956, "res": {"No": 0.6311731931583929, "Yes": 0.3688073713897956}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.45093698593783227, "res": {"No": 0.5490444145089475, "Yes": 0.45093698593783227}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4088675311991235, "res": {"No": 0.5911141990733751, "Yes": 0.4088675311991235}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.6421015565692352, "res": {"Yes": 0.6421015565692352, "No": 0.35788062425160966}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.47278323617570966, "res": {"No": 0.5271954796978913, "Yes": 0.47278323617570966}, "ground_truth": 1}, {"key": "29968443", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4149570989994648, "res": {"No": 0.5850214884685662, "Yes": 0.4149570989994648}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.508555090565479, "res": {"Yes": 0.508555090565479, "No": 0.49142342013911616}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4107503787382746, "res": {"No": 0.5892296342019564, "Yes": 0.4107503787382746}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3745123609603031, "res": {"No": 0.6254699135951174, "Yes": 0.3745123609603031}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4397534939879059, "res": {"No": 0.5602262830472083, "Yes": 0.4397534939879059}, "ground_truth": 1}, {"key": "21268042", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3919151173545953, "res": {"No": 0.6080668392993649, "Yes": 0.3919151173545953}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.41364068195450276, "res": {"No": 0.5863420011118737, "Yes": 0.41364068195450276}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1875133005164094, "res": {"No": 0.8124678945166555, "Yes": 0.1875133005164094}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.35908842917648087, "res": {"No": 0.640890784226736, "Yes": 0.35908842917648087}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.38219888289828613, "res": {"No": 0.6177711052415826, "Yes": 0.38219888289828613}, "ground_truth": 1}, {"key": "26808572", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.32679935162823054, "res": {"No": 0.6731783479914075, "Yes": 0.32679935162823054}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2873972712468692, "res": {"No": 0.7125834150054939, "Yes": 0.2873972712468692}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2248539272116506, "res": {"No": 0.7751340320510347, "Yes": 0.2248539272116506}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3449884295670441, "res": {"No": 0.654995273109266, "Yes": 0.3449884295670441}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.41764881712508467, "res": {"No": 0.5823339724539455, "Yes": 0.41764881712508467}, "ground_truth": 1}, {"key": "37829390", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4026840981439035, "res": {"No": 0.5972903114209458, "Yes": 0.4026840981439035}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.32186524976988623, "res": {"No": 0.678118891649289, "Yes": 0.32186524976988623}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.16208777005536512, "res": {"No": 0.8378892140867925, "Yes": 0.16208777005536512}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5113117993531643, "res": {"Yes": 0.5113117993531643, "No": 0.4886645288541932}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3362224447913389, "res": {"No": 0.6637558585661049, "Yes": 0.3362224447913389}, "ground_truth": 1}, {"key": "35716045", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2540220765981009, "res": {"No": 0.7459647290425022, "Yes": 0.2540220765981009}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.412980653889844, "res": {"No": 0.5869985672251704, "Yes": 0.412980653889844}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.37121213029781425, "res": {"No": 0.6287669938131479, "Yes": 0.37121213029781425}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.24105578481877055, "res": {"No": 0.7589299487370189, "Yes": 0.24105578481877055}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.22224033335493923, "res": {"No": 0.7777423572472668, "Yes": 0.22224033335493923}, "ground_truth": 1}, {"key": "34367070", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.22043248972988638, "res": {"No": 0.7795567500432067, "Yes": 0.22043248972988638}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.21957750738053536, "res": {"No": 0.7804099737577077, "Yes": 0.21957750738053536}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.36408205897788914, "res": {"No": 0.6358920771860664, "Yes": 0.36408205897788914}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3076045037124295, "res": {"No": 0.6923768781589607, "Yes": 0.3076045037124295}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4093993063766632, "res": {"No": 0.5905760278389913, "Yes": 0.4093993063766632}, "ground_truth": 1}, {"key": "35239748", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.31398078161335463, "res": {"No": 0.6859906370256853, "Yes": 0.31398078161335463}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.13758148764466055, "res": {"No": 0.8624031984986849, "Yes": 0.13758148764466055}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3567291744794125, "res": {"No": 0.6432408838099957, "Yes": 0.3567291744794125}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.32561710905387403, "res": {"No": 0.6743694182008174, "Yes": 0.32561710905387403}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.552128262552399, "res": {"Yes": 0.552128262552399, "No": 0.4478380667751754}, "ground_truth": 1}, {"key": "40421370", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4237167031550638, "res": {"No": 0.5762628993111972, "Yes": 0.4237167031550638}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4497760890232893, "res": {"No": 0.5501876374154041, "Yes": 0.4497760890232893}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.0171170968357407, "res": {"No": 0.9828669263740101, "Yes": 0.0171170968357407}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.36923255614501, "res": {"No": 0.6307424698114565, "Yes": 0.36923255614501}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5358116387052254, "res": {"Yes": 0.5358116387052254, "No": 0.4641664790731928}, "ground_truth": 1}, {"key": "37288396", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3950603266682496, "res": {"No": 0.6049235213098173, "Yes": 0.3950603266682496}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.35450614460528107, "res": {"No": 0.6454722238905318, "Yes": 0.35450614460528107}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5426637694246924, "res": {"Yes": 0.5426637694246924, "No": 0.45730962549717963}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.44761513027457805, "res": {"No": 0.5523620279188867, "Yes": 0.44761513027457805}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5118160632263233, "res": {"Yes": 0.5118160632263233, "No": 0.4881640995916198}, "ground_truth": 1}, {"key": "38903688", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4650514659764777, "res": {"No": 0.5349208965146577, "Yes": 0.4650514659764777}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.43580446979325305, "res": {"No": 0.564171948907065, "Yes": 0.43580446979325305}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.12893614463555025, "res": {"No": 0.8710508261878052, "Yes": 0.12893614463555025}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2735353267536494, "res": {"No": 0.7264479815359003, "Yes": 0.2735353267536494}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2803192631053144, "res": {"No": 0.7196616557425831, "Yes": 0.2803192631053144}, "ground_truth": 1}, {"key": "28071228", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2977159008675495, "res": {"No": 0.7022678886405315, "Yes": 0.2977159008675495}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3998391650931663, "res": {"No": 0.6001472483250299, "Yes": 0.3998391650931663}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.25287274524865505, "res": {"No": 0.7471145566384954, "Yes": 0.25287274524865505}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3986683782020223, "res": {"No": 0.6013154541717786, "Yes": 0.3986683782020223}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3345787503724662, "res": {"No": 0.6654083610682, "Yes": 0.3345787503724662}, "ground_truth": 1}, {"key": "36855834", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.43731457132507556, "res": {"No": 0.562670207254624, "Yes": 0.43731457132507556}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3911678606496503, "res": {"No": 0.6088100163246178, "Yes": 0.3911678606496503}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.14252205775346155, "res": {"No": 0.8574629910489611, "Yes": 0.14252205775346155}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.32878902345829947, "res": {"No": 0.6711838402686522, "Yes": 0.32878902345829947}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3375909916896774, "res": {"No": 0.6623845503036798, "Yes": 0.3375909916896774}, "ground_truth": 1}, {"key": "40548717", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3031445271200846, "res": {"No": 0.6968385156001056, "Yes": 0.3031445271200846}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3137845735579849, "res": {"No": 0.686187386770279, "Yes": 0.3137845735579849}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.46986324251093026, "res": {"No": 0.5301174994137733, "Yes": 0.46986324251093026}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.46310558756464226, "res": {"No": 0.5368718250614299, "Yes": 0.46310558756464226}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5656916710526186, "res": {"Yes": 0.5656916710526186, "No": 0.43428963748301125}, "ground_truth": 1}, {"key": "37051175", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.49286792078092473, "res": {"No": 0.5071146027537189, "Yes": 0.49286792078092473}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.41578291245063087, "res": {"No": 0.5842000695364518, "Yes": 0.41578291245063087}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4766461439373791, "res": {"No": 0.5233336312712155, "Yes": 0.4766461439373791}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5216959667287829, "res": {"Yes": 0.5216959667287829, "No": 0.47827605130350326}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.38620086856003527, "res": {"No": 0.6137828364256679, "Yes": 0.38620086856003527}, "ground_truth": 1}, {"key": "38882119", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4028082245098419, "res": {"No": 0.5971679212585627, "Yes": 0.4028082245098419}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4834014070405932, "res": {"No": 0.5165724134289083, "Yes": 0.4834014070405932}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.07240637724319811, "res": {"No": 0.9275841563569737, "Yes": 0.07240637724319811}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.27371635760423174, "res": {"No": 0.7262649614967407, "Yes": 0.27371635760423174}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.40886777651971584, "res": {"No": 0.5911156827718769, "Yes": 0.40886777651971584}, "ground_truth": 1}, {"key": "19485402", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.33963315491380364, "res": {"No": 0.6603460256904157, "Yes": 0.33963315491380364}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3952590129460429, "res": {"No": 0.6047315490517625, "Yes": 0.3952590129460429}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.37266888550645744, "res": {"No": 0.6272989548434431, "Yes": 0.37266888550645744}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.38856479911268904, "res": {"No": 0.6113996756679174, "Yes": 0.38856479911268904}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3240013164761722, "res": {"No": 0.6759661622737966, "Yes": 0.3240013164761722}, "ground_truth": 1}, {"key": "36060907", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4307340640843305, "res": {"No": 0.5692384864877318, "Yes": 0.4307340640843305}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.1959922032263973, "res": {"No": 0.8039811543026235, "Yes": 0.1959922032263973}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.06138757658812249, "res": {"No": 0.9385860953276348, "Yes": 0.06138757658812249}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.30716749311209507, "res": {"No": 0.6927977634608641, "Yes": 0.30716749311209507}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.26043512216425324, "res": {"No": 0.7395413368620225, "Yes": 0.26043512216425324}, "ground_truth": 1}, {"key": "24037309", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3679418756100214, "res": {"No": 0.6320263327244672, "Yes": 0.3679418756100214}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2802514340512895, "res": {"No": 0.7197249527686516, "Yes": 0.2802514340512895}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3687623058825703, "res": {"No": 0.6312164615637916, "Yes": 0.3687623058825703}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3694265649587067, "res": {"No": 0.6305565101976863, "Yes": 0.3694265649587067}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.35324186030151217, "res": {"No": 0.6467426386790658, "Yes": 0.35324186030151217}, "ground_truth": 1}, {"key": "35605805", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3597013410650169, "res": {"No": 0.6402751328043542, "Yes": 0.3597013410650169}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3124066420121955, "res": {"No": 0.6875728784784406, "Yes": 0.3124066420121955}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.07924515529640022, "res": {"No": 0.9207467070880364, "Yes": 0.07924515529640022}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.21546461122000027, "res": {"No": 0.7845238074329375, "Yes": 0.21546461122000027}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3172564949314619, "res": {"No": 0.6827306502026885, "Yes": 0.3172564949314619}, "ground_truth": 1}, {"key": "17706248", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4270721800967256, "res": {"No": 0.5729176270106496, "Yes": 0.4270721800967256}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.37610436786491186, "res": {"No": 0.6238816200927594, "Yes": 0.37610436786491186}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.24896384347218348, "res": {"No": 0.7510224306156881, "Yes": 0.24896384347218348}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3000273925482908, "res": {"No": 0.6999516274285509, "Yes": 0.3000273925482908}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.41659470094638523, "res": {"No": 0.5833857194461879, "Yes": 0.41659470094638523}, "ground_truth": 1}, {"key": "36883559", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.474835975176732, "res": {"No": 0.5251345833522422, "Yes": 0.474835975176732}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.297884932379487, "res": {"No": 0.7020918942734581, "Yes": 0.297884932379487}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.09497619478431996, "res": {"No": 0.9050120413605076, "Yes": 0.09497619478431996}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3539595988159417, "res": {"No": 0.6460154558637913, "Yes": 0.3539595988159417}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.44994772933403515, "res": {"No": 0.5500281611365209, "Yes": 0.44994772933403515}, "ground_truth": 1}, {"key": "32799471", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5784704002708335, "res": {"Yes": 0.5784704002708335, "No": 0.4215037098655319}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3014787036408577, "res": {"No": 0.6985019699719346, "Yes": 0.3014787036408577}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.047917394429222646, "res": {"No": 0.9520705193373433, "Yes": 0.047917394429222646}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5194737495193076, "res": {"Yes": 0.5194737495193076, "No": 0.4805061747394194}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.44887359791234993, "res": {"No": 0.5511065570780316, "Yes": 0.44887359791234993}, "ground_truth": 1}, {"key": "34797243", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6408082171771797, "res": {"Yes": 0.6408082171771797, "No": 0.35917296854260206}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4389111404275649, "res": {"No": 0.5610697385750295, "Yes": 0.4389111404275649}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.11612925975774205, "res": {"No": 0.8838614229514314, "Yes": 0.11612925975774205}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.27811288126242395, "res": {"No": 0.7218681749004744, "Yes": 0.27811288126242395}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.304261515202494, "res": {"No": 0.6957140574927758, "Yes": 0.304261515202494}, "ground_truth": 1}, {"key": "32154876", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.38221943428473865, "res": {"No": 0.617751330704997, "Yes": 0.38221943428473865}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3497279193834645, "res": {"No": 0.6502522593972831, "Yes": 0.3497279193834645}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3441644443260245, "res": {"No": 0.6558168507165332, "Yes": 0.3441644443260245}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3395154584602841, "res": {"No": 0.6604660346738901, "Yes": 0.3395154584602841}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.33463600169436586, "res": {"No": 0.6653394482929538, "Yes": 0.33463600169436586}, "ground_truth": 1}, {"key": "37962274", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4136927046777857, "res": {"No": 0.5862815406421145, "Yes": 0.4136927046777857}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.52392774258487, "res": {"Yes": 0.52392774258487, "No": 0.47604632883577597}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3160556099574616, "res": {"No": 0.683926734774063, "Yes": 0.3160556099574616}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4803052930847111, "res": {"No": 0.519671737906631, "Yes": 0.4803052930847111}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3648467049707989, "res": {"No": 0.6351356759491927, "Yes": 0.3648467049707989}, "ground_truth": 1}, {"key": "35574030", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.32914328707666995, "res": {"No": 0.6708391416733205, "Yes": 0.32914328707666995}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3752895716241762, "res": {"No": 0.6246924689636836, "Yes": 0.3752895716241762}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.10954764611793855, "res": {"No": 0.890436319284831, "Yes": 0.10954764611793855}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.1750884332218336, "res": {"No": 0.8248902514846391, "Yes": 0.1750884332218336}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.10634539911519597, "res": {"No": 0.8936415008442453, "Yes": 0.10634539911519597}, "ground_truth": 1}, {"key": "39105949", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.16240338627904444, "res": {"No": 0.8375821839565378, "Yes": 0.16240338627904444}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.16271987742012284, "res": {"No": 0.8372671866648722, "Yes": 0.16271987742012284}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2524628171303703, "res": {"No": 0.7475270104416694, "Yes": 0.2524628171303703}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.22655070408881375, "res": {"No": 0.7734379360526049, "Yes": 0.22655070408881375}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.28525447567605844, "res": {"No": 0.7147305217409661, "Yes": 0.28525447567605844}, "ground_truth": 1}, {"key": "41064322", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3114795166685586, "res": {"No": 0.6885083314604534, "Yes": 0.3114795166685586}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.27412962514292794, "res": {"No": 0.7258563636090659, "Yes": 0.27412962514292794}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3345469334462246, "res": {"No": 0.6654326156449979, "Yes": 0.3345469334462246}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.38466382806109395, "res": {"No": 0.6153162596623533, "Yes": 0.38466382806109395}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3680772147988404, "res": {"No": 0.6319007489300473, "Yes": 0.3680772147988404}, "ground_truth": 1}, {"key": "28105101", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.39552309462104784, "res": {"No": 0.6044515506182075, "Yes": 0.39552309462104784}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.46177771502183035, "res": {"No": 0.5381918682304312, "Yes": 0.46177771502183035}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.19852814509783875, "res": {"No": 0.8014429968885645, "Yes": 0.19852814509783875}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.33774672456428506, "res": {"No": 0.6622289214675975, "Yes": 0.33774672456428506}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3773001328141028, "res": {"No": 0.6226729237891735, "Yes": 0.3773001328141028}, "ground_truth": 1}, {"key": "36036068", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4341499185862385, "res": {"No": 0.5658333945663463, "Yes": 0.4341499185862385}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3087453660183603, "res": {"No": 0.6912367323754697, "Yes": 0.3087453660183603}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.09398183124703692, "res": {"No": 0.906010313001573, "Yes": 0.09398183124703692}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.46747280643439143, "res": {"No": 0.5325099692380503, "Yes": 0.46747280643439143}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4343398225500088, "res": {"No": 0.5656426842750635, "Yes": 0.4343398225500088}, "ground_truth": 1}, {"key": "37991460", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.40104621858523626, "res": {"No": 0.5989366775864002, "Yes": 0.40104621858523626}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.31616947369379617, "res": {"No": 0.6838117900287092, "Yes": 0.31616947369379617}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.04351415463102144, "res": {"No": 0.9564781184971913, "Yes": 0.04351415463102144}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.28921335807981874, "res": {"No": 0.7107639297355682, "Yes": 0.28921335807981874}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.260860942760178, "res": {"No": 0.7391218844772761, "Yes": 0.260860942760178}, "ground_truth": 1}, {"key": "38437830", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.16170281560944375, "res": {"No": 0.8382877083808323, "Yes": 0.16170281560944375}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2566259247865613, "res": {"No": 0.7433525585576997, "Yes": 0.2566259247865613}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.31465798245999616, "res": {"No": 0.6853193122936257, "Yes": 0.31465798245999616}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2618555665336685, "res": {"No": 0.7381218718553989, "Yes": 0.2618555665336685}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3324590780480822, "res": {"No": 0.6675221938813684, "Yes": 0.3324590780480822}, "ground_truth": 1}, {"key": "36507138", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.26050758529439333, "res": {"No": 0.7394762157192244, "Yes": 0.26050758529439333}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.29859719134489554, "res": {"No": 0.7013813579307703, "Yes": 0.29859719134489554}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2801551842155485, "res": {"No": 0.719823713004712, "Yes": 0.2801551842155485}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.41686753479951116, "res": {"No": 0.5831145664596128, "Yes": 0.41686753479951116}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3989400037259873, "res": {"No": 0.6010277635038596, "Yes": 0.3989400037259873}, "ground_truth": 1}, {"key": "37824866", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3409156817055447, "res": {"No": 0.6590601855802426, "Yes": 0.3409156817055447}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.39855913411669297, "res": {"No": 0.6014179090130736, "Yes": 0.39855913411669297}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.14991179367757146, "res": {"No": 0.8500796483045535, "Yes": 0.14991179367757146}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.47896445069183846, "res": {"No": 0.5210188278363801, "Yes": 0.47896445069183846}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.374648041399291, "res": {"No": 0.6253349830174186, "Yes": 0.374648041399291}, "ground_truth": 1}, {"key": "25088134", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3871898646227202, "res": {"No": 0.6127942768152927, "Yes": 0.3871898646227202}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.34863731819676536, "res": {"No": 0.6513464741468417, "Yes": 0.34863731819676536}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.38344186510683514, "res": {"No": 0.6165366682773129, "Yes": 0.38344186510683514}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.38862983864579803, "res": {"No": 0.6113562433776911, "Yes": 0.38862983864579803}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3363995797974194, "res": {"No": 0.6635838884294464, "Yes": 0.3363995797974194}, "ground_truth": 1}, {"key": "40172531", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3175152596228723, "res": {"No": 0.6824669213390987, "Yes": 0.3175152596228723}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3000471950096731, "res": {"No": 0.6999377755229091, "Yes": 0.3000471950096731}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.08424301678583355, "res": {"No": 0.9157430690104619, "Yes": 0.08424301678583355}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.42628280136337754, "res": {"No": 0.5737006123529694, "Yes": 0.42628280136337754}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3044775262422841, "res": {"No": 0.695504533018536, "Yes": 0.3044775262422841}, "ground_truth": 1}, {"key": "37035874", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3007055897605413, "res": {"No": 0.6992801501423582, "Yes": 0.3007055897605413}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.31843684897592484, "res": {"No": 0.6815427708138302, "Yes": 0.31843684897592484}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.314078947348192, "res": {"No": 0.6859068689640737, "Yes": 0.314078947348192}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.39364121751125314, "res": {"No": 0.6063308104656935, "Yes": 0.39364121751125314}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.34611599999858805, "res": {"No": 0.6538616135446682, "Yes": 0.34611599999858805}, "ground_truth": 1}, {"key": "36404465", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.44539569040121013, "res": {"No": 0.5545820344679259, "Yes": 0.44539569040121013}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3592704972424833, "res": {"No": 0.6407059010901947, "Yes": 0.3592704972424833}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3409831214969616, "res": {"No": 0.6589969979189141, "Yes": 0.3409831214969616}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.313917646426628, "res": {"No": 0.6860686385954203, "Yes": 0.313917646426628}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.455579884523669, "res": {"No": 0.5443983331612591, "Yes": 0.455579884523669}, "ground_truth": 1}, {"key": "39602052", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.43191757826249305, "res": {"No": 0.5680612281680807, "Yes": 0.43191757826249305}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3975090014447988, "res": {"No": 0.6024693417344903, "Yes": 0.3975090014447988}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2974606296692551, "res": {"No": 0.702526806347833, "Yes": 0.2974606296692551}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.22191780797729938, "res": {"No": 0.7780666967560562, "Yes": 0.22191780797729938}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.39057541662039197, "res": {"No": 0.6094005560354177, "Yes": 0.39057541662039197}, "ground_truth": 1}, {"key": "33792789", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3051028640663203, "res": {"No": 0.6948756293544813, "Yes": 0.3051028640663203}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.40243426992231085, "res": {"No": 0.597539433419135, "Yes": 0.40243426992231085}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.12571743119611847, "res": {"No": 0.8742663163137969, "Yes": 0.12571743119611847}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3316723483283966, "res": {"No": 0.6683106089892406, "Yes": 0.3316723483283966}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2724048772857899, "res": {"No": 0.7275791635356076, "Yes": 0.2724048772857899}, "ground_truth": 1}, {"key": "32776626", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4097677677017347, "res": {"No": 0.5902085086638729, "Yes": 0.4097677677017347}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.37212999381934847, "res": {"No": 0.6278499149559428, "Yes": 0.37212999381934847}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4386170368699554, "res": {"No": 0.5613646975015244, "Yes": 0.4386170368699554}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.426457826374763, "res": {"No": 0.5735214162735554, "Yes": 0.426457826374763}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.40562885001760557, "res": {"No": 0.5943515498026342, "Yes": 0.40562885001760557}, "ground_truth": 1}, {"key": "37195090", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3652588860384135, "res": {"No": 0.6347166247051884, "Yes": 0.3652588860384135}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.44677576702365507, "res": {"No": 0.5532075673805142, "Yes": 0.44677576702365507}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2863736077078372, "res": {"No": 0.7136051577170176, "Yes": 0.2863736077078372}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.42077858921339556, "res": {"No": 0.5791927820028129, "Yes": 0.42077858921339556}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3012563848135635, "res": {"No": 0.6987127537835613, "Yes": 0.3012563848135635}, "ground_truth": 1}, {"key": "33981824", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.36359458846468745, "res": {"No": 0.6363635432910433, "Yes": 0.36359458846468745}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4340596682000172, "res": {"No": 0.5659110888250091, "Yes": 0.4340596682000172}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.20430747285016093, "res": {"No": 0.7956697708161723, "Yes": 0.20430747285016093}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3341737204880128, "res": {"No": 0.6657904084485682, "Yes": 0.3341737204880128}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3338520660966301, "res": {"No": 0.6661168988802743, "Yes": 0.3338520660966301}, "ground_truth": 1}, {"key": "39569142", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.31435108895840275, "res": {"No": 0.6856270006241992, "Yes": 0.31435108895840275}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.1934692294669676, "res": {"No": 0.8064971058129015, "Yes": 0.1934692294669676}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.06436611815015941, "res": {"No": 0.9356166952499458, "Yes": 0.06436611815015941}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.41045928045652397, "res": {"No": 0.5895132633124825, "Yes": 0.41045928045652397}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3812284032035452, "res": {"No": 0.6187570074448329, "Yes": 0.3812284032035452}, "ground_truth": 1}, {"key": "40268210", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.41262767745340956, "res": {"No": 0.5873509895165689, "Yes": 0.41262767745340956}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.399863659990691, "res": {"No": 0.6001199062392536, "Yes": 0.399863659990691}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.15687311266330214, "res": {"No": 0.8431095220702831, "Yes": 0.15687311266330214}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.32302841095974766, "res": {"No": 0.6769574763048508, "Yes": 0.32302841095974766}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.27987032726370115, "res": {"No": 0.7201115992678588, "Yes": 0.27987032726370115}, "ground_truth": 1}, {"key": "34925159", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2164197002235849, "res": {"No": 0.7835649918156861, "Yes": 0.2164197002235849}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.23630302342804765, "res": {"No": 0.7636820586736996, "Yes": 0.23630302342804765}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3149901848022195, "res": {"No": 0.6849902119130721, "Yes": 0.3149901848022195}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2751437749327429, "res": {"No": 0.7248339897517966, "Yes": 0.2751437749327429}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.339648472714504, "res": {"No": 0.6603342716357697, "Yes": 0.339648472714504}, "ground_truth": 1}, {"key": "36181903", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.10906022787938728, "res": {"No": 0.8909258286839609, "Yes": 0.10906022787938728}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2673426794122186, "res": {"No": 0.7326359558921021, "Yes": 0.2673426794122186}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.36480657404049377, "res": {"No": 0.6351759702345962, "Yes": 0.36480657404049377}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.1916842101163079, "res": {"No": 0.8082931474575292, "Yes": 0.1916842101163079}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.27825545064435453, "res": {"No": 0.7217218526157865, "Yes": 0.27825545064435453}, "ground_truth": 1}, {"key": "38620559", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2362923191435403, "res": {"No": 0.7636911694550054, "Yes": 0.2362923191435403}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.21290213051856763, "res": {"No": 0.7870767445560872, "Yes": 0.21290213051856763}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.20009724612117943, "res": {"No": 0.7998891447326338, "Yes": 0.20009724612117943}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.1766721012119732, "res": {"No": 0.8233134566831694, "Yes": 0.1766721012119732}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.12770032128640743, "res": {"No": 0.872285561411225, "Yes": 0.12770032128640743}, "ground_truth": 1}, {"key": "32719657", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.24746235736376657, "res": {"No": 0.7525201013187031, "Yes": 0.24746235736376657}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.05394227422687546, "res": {"No": 0.9460490200860491, "Yes": 0.05394227422687546}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.18749621883281486, "res": {"No": 0.8124977207605205, "Yes": 0.18749621883281486}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3586387372791825, "res": {"No": 0.6413481267709221, "Yes": 0.3586387372791825}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.41804760725004425, "res": {"No": 0.581937980292647, "Yes": 0.41804760725004425}, "ground_truth": 1}, {"key": "37530914", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3705680575581603, "res": {"No": 0.6294143909875618, "Yes": 0.3705680575581603}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4183406191254031, "res": {"No": 0.5816443153990953, "Yes": 0.4183406191254031}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2730099875593197, "res": {"No": 0.7269734660373736, "Yes": 0.2730099875593197}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.28128705022678696, "res": {"No": 0.718697308119754, "Yes": 0.28128705022678696}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3428545068791891, "res": {"No": 0.6571286929493961, "Yes": 0.3428545068791891}, "ground_truth": 1}, {"key": "33306933", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3965477009893255, "res": {"No": 0.6034359952455683, "Yes": 0.3965477009893255}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.34966574328701694, "res": {"No": 0.6503140362962821, "Yes": 0.34966574328701694}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.593113107736366, "res": {"Yes": 0.593113107736366, "No": 0.40684954100814763}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.27420995689112115, "res": {"No": 0.7257728441226913, "Yes": 0.27420995689112115}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.34225318800979515, "res": {"No": 0.6577126468210273, "Yes": 0.34225318800979515}, "ground_truth": 1}, {"key": "33837212", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5137046651923131, "res": {"Yes": 0.5137046651923131, "No": 0.486260057723077}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.16018933813933756, "res": {"No": 0.8397948862483402, "Yes": 0.16018933813933756}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.25290645543233053, "res": {"No": 0.7470729808702748, "Yes": 0.25290645543233053}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3153325130257525, "res": {"No": 0.6846420924777615, "Yes": 0.3153325130257525}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3306865602925482, "res": {"No": 0.6692856295306749, "Yes": 0.3306865602925482}, "ground_truth": 1}, {"key": "40945179", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3534180306086664, "res": {"No": 0.6465557053377233, "Yes": 0.3534180306086664}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2701031380896109, "res": {"No": 0.7298699218236265, "Yes": 0.2701031380896109}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1696298786254538, "res": {"No": 0.8303530144494682, "Yes": 0.1696298786254538}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3089450653278568, "res": {"No": 0.6910402762569405, "Yes": 0.3089450653278568}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.41189418299984326, "res": {"No": 0.5880944233777331, "Yes": 0.41189418299984326}, "ground_truth": 1}, {"key": "34152358", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5745340353609624, "res": {"Yes": 0.5745340353609624, "No": 0.4254454250273996}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.47020911391670084, "res": {"No": 0.5297695650002012, "Yes": 0.47020911391670084}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1925975500700304, "res": {"No": 0.8073889310766802, "Yes": 0.1925975500700304}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2314537290378394, "res": {"No": 0.7685360557944697, "Yes": 0.2314537290378394}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3993434960866062, "res": {"No": 0.6006438594751693, "Yes": 0.3993434960866062}, "ground_truth": 1}, {"key": "34136541", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.44046497442097254, "res": {"No": 0.5595199472188422, "Yes": 0.44046497442097254}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3003598569537204, "res": {"No": 0.6996280795955433, "Yes": 0.3003598569537204}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3832083128038614, "res": {"No": 0.6167772754915503, "Yes": 0.3832083128038614}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3004942379698095, "res": {"No": 0.6994914555733682, "Yes": 0.3004942379698095}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.40783526433392514, "res": {"No": 0.5921486012335869, "Yes": 0.40783526433392514}, "ground_truth": 1}, {"key": "37469603", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3896026715796395, "res": {"No": 0.6103807720997081, "Yes": 0.3896026715796395}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.29570235145232926, "res": {"No": 0.7042853346763198, "Yes": 0.29570235145232926}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2748678891051993, "res": {"No": 0.7251119951418034, "Yes": 0.2748678891051993}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.36503851880319094, "res": {"No": 0.6349365032316864, "Yes": 0.36503851880319094}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4103257469415947, "res": {"No": 0.5896464258156097, "Yes": 0.4103257469415947}, "ground_truth": 1}, {"key": "37353611", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.321109754487569, "res": {"No": 0.6788706773635033, "Yes": 0.321109754487569}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.22198403813224746, "res": {"No": 0.7779916402822274, "Yes": 0.22198403813224746}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.32415275767284124, "res": {"No": 0.6758307392606538, "Yes": 0.32415275767284124}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4245601657747903, "res": {"No": 0.5754200978443277, "Yes": 0.4245601657747903}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.42995754832230276, "res": {"No": 0.5700185356488894, "Yes": 0.42995754832230276}, "ground_truth": 1}, {"key": "37211649", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5120830295502287, "res": {"Yes": 0.5120830295502287, "No": 0.4878917409769365}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4040224230347237, "res": {"No": 0.5959538188916711, "Yes": 0.4040224230347237}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3060729520658083, "res": {"No": 0.6939171333495958, "Yes": 0.3060729520658083}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.33974919341777027, "res": {"No": 0.6602378698696111, "Yes": 0.33974919341777027}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.31441339951900693, "res": {"No": 0.6855668052161615, "Yes": 0.31441339951900693}, "ground_truth": 1}, {"key": "37320976", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.34454643777761157, "res": {"No": 0.6554374982876782, "Yes": 0.34454643777761157}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4094847978768367, "res": {"No": 0.5904988209762343, "Yes": 0.4094847978768367}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.07658501867645076, "res": {"No": 0.9234027137927878, "Yes": 0.07658501867645076}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2844251366822746, "res": {"No": 0.7155549238776704, "Yes": 0.2844251366822746}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.18974938572453326, "res": {"No": 0.8102349432321299, "Yes": 0.18974938572453326}, "ground_truth": 1}, {"key": "34492412", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.24622178110348616, "res": {"No": 0.7537655507319968, "Yes": 0.24622178110348616}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.0865962038348366, "res": {"No": 0.9133851192196306, "Yes": 0.0865962038348366}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3583371055598952, "res": {"No": 0.6416318513735637, "Yes": 0.3583371055598952}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3416867490834043, "res": {"No": 0.6582881339257641, "Yes": 0.3416867490834043}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3624071773354172, "res": {"No": 0.637567560697548, "Yes": 0.3624071773354172}, "ground_truth": 1}, {"key": "36655016", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3202781634485486, "res": {"No": 0.6796992214921472, "Yes": 0.3202781634485486}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2626600154510073, "res": {"No": 0.7373216135190578, "Yes": 0.2626600154510073}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3580131361851532, "res": {"No": 0.6419645659754712, "Yes": 0.3580131361851532}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3169351183589364, "res": {"No": 0.6830427703970646, "Yes": 0.3169351183589364}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4080492504401309, "res": {"No": 0.5919295645365904, "Yes": 0.4080492504401309}, "ground_truth": 1}, {"key": "35220773", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3806465594077167, "res": {"No": 0.6193346266739065, "Yes": 0.3806465594077167}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.35136921984286074, "res": {"No": 0.6486131440828279, "Yes": 0.35136921984286074}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2325795876632932, "res": {"No": 0.7674054494566581, "Yes": 0.2325795876632932}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3562241896960155, "res": {"No": 0.6437616822399466, "Yes": 0.3562241896960155}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2831941993198479, "res": {"No": 0.7167912531733587, "Yes": 0.2831941993198479}, "ground_truth": 1}, {"key": "31569808", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.382474471163158, "res": {"No": 0.6175024147261243, "Yes": 0.382474471163158}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3502250258816142, "res": {"No": 0.6497621799771435, "Yes": 0.3502250258816142}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2897030633639156, "res": {"No": 0.7102841626021023, "Yes": 0.2897030633639156}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.36943297456521185, "res": {"No": 0.6305494101713548, "Yes": 0.36943297456521185}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.39741971902593454, "res": {"No": 0.6025615265957377, "Yes": 0.39741971902593454}, "ground_truth": 1}, {"key": "37696256", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.33832293859867535, "res": {"No": 0.6616591531726522, "Yes": 0.33832293859867535}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4618062583845194, "res": {"No": 0.5381768797956159, "Yes": 0.4618062583845194}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.15018872635885283, "res": {"No": 0.8497937296050854, "Yes": 0.15018872635885283}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.38809305330785493, "res": {"No": 0.6118827682291349, "Yes": 0.38809305330785493}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2778207937364561, "res": {"No": 0.7221629882986571, "Yes": 0.2778207937364561}, "ground_truth": 1}, {"key": "36874328", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.30498392764778237, "res": {"No": 0.6949957281381834, "Yes": 0.30498392764778237}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2277894822541655, "res": {"No": 0.7721908843501798, "Yes": 0.2277894822541655}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.7740050197100027, "res": {"Yes": 0.7740050197100027, "No": 0.22596865138882083}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5288861963715381, "res": {"Yes": 0.5288861963715381, "No": 0.4710942730022012}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.48964281630712486, "res": {"No": 0.5103331659402321, "Yes": 0.48964281630712486}, "ground_truth": 1}, {"key": "24532377", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4264936375419545, "res": {"No": 0.5734831636708284, "Yes": 0.4264936375419545}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.815563168565521, "res": {"Yes": 0.815563168565521, "No": 0.18441319758557412}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.10457411756367713, "res": {"No": 0.8954040052900967, "Yes": 0.10457411756367713}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.34106217054595533, "res": {"No": 0.65891344239711, "Yes": 0.34106217054595533}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.23324909948289335, "res": {"No": 0.7667369239476273, "Yes": 0.23324909948289335}, "ground_truth": 1}, {"key": "39560618", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.40996848011800235, "res": {"No": 0.5900011458297121, "Yes": 0.40996848011800235}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.21018756084543283, "res": {"No": 0.7897989970569854, "Yes": 0.21018756084543283}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.09987692509876082, "res": {"No": 0.9001148668216349, "Yes": 0.09987692509876082}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4231401207755835, "res": {"No": 0.5768447573853819, "Yes": 0.4231401207755835}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4340205481650931, "res": {"No": 0.5659572350965497, "Yes": 0.4340205481650931}, "ground_truth": 1}, {"key": "34922693", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.46981813780470977, "res": {"No": 0.53016062622651, "Yes": 0.46981813780470977}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5932334981895222, "res": {"Yes": 0.5932334981895222, "No": 0.40674510799957453}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1200040004106898, "res": {"No": 0.8799719033772425, "Yes": 0.1200040004106898}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.1932097292567256, "res": {"No": 0.8067683441304447, "Yes": 0.1932097292567256}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3343894323818009, "res": {"No": 0.6655825545032807, "Yes": 0.3343894323818009}, "ground_truth": 1}, {"key": "33629577", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3208282005185574, "res": {"No": 0.6791453564728414, "Yes": 0.3208282005185574}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.13297015160098152, "res": {"No": 0.8670148970967652, "Yes": 0.13297015160098152}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3050855347157995, "res": {"No": 0.6948944329434231, "Yes": 0.3050855347157995}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.41448464074118835, "res": {"No": 0.5854922456567753, "Yes": 0.41448464074118835}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5455884565644097, "res": {"Yes": 0.5455884565644097, "No": 0.45439176706235296}, "ground_truth": 1}, {"key": "32284359", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.47628585465985707, "res": {"No": 0.5236956573387646, "Yes": 0.47628585465985707}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3998785911785186, "res": {"No": 0.6001034031687502, "Yes": 0.3998785911785186}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2965472245118027, "res": {"No": 0.7034367317630207, "Yes": 0.2965472245118027}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.23779355445306788, "res": {"No": 0.7621871117203671, "Yes": 0.23779355445306788}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.21163249981155563, "res": {"No": 0.788346456610151, "Yes": 0.21163249981155563}, "ground_truth": 1}, {"key": "28082962", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.406574880128545, "res": {"No": 0.5933925298088424, "Yes": 0.406574880128545}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.32960190540557954, "res": {"No": 0.6703684448901407, "Yes": 0.32960190540557954}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.21729156712433612, "res": {"No": 0.7826880390041046, "Yes": 0.21729156712433612}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.29848574581596865, "res": {"No": 0.7014860048215007, "Yes": 0.29848574581596865}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2324452419937429, "res": {"No": 0.7675387363346963, "Yes": 0.2324452419937429}, "ground_truth": 1}, {"key": "24796803", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3029826305601183, "res": {"No": 0.6970033025482885, "Yes": 0.3029826305601183}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.28362265609925463, "res": {"No": 0.716357525040362, "Yes": 0.28362265609925463}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.18368386941625595, "res": {"No": 0.8163031464546802, "Yes": 0.18368386941625595}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3881581303256824, "res": {"No": 0.6118231370404184, "Yes": 0.3881581303256824}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.30352876359738395, "res": {"No": 0.6964480470511452, "Yes": 0.30352876359738395}, "ground_truth": 1}, {"key": "35466150", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.45146067882782276, "res": {"No": 0.5485184843593214, "Yes": 0.45146067882782276}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3965901497270553, "res": {"No": 0.603379757631525, "Yes": 0.3965901497270553}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4634665800628852, "res": {"No": 0.5365130461815782, "Yes": 0.4634665800628852}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.397943238779602, "res": {"No": 0.6020390311365885, "Yes": 0.397943238779602}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.37033654710387626, "res": {"No": 0.6296489733755788, "Yes": 0.37033654710387626}, "ground_truth": 1}, {"key": "35754289", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.38860705006022767, "res": {"No": 0.6113728602662086, "Yes": 0.38860705006022767}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.36759334227311335, "res": {"No": 0.6323943044078973, "Yes": 0.36759334227311335}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.31224039220796324, "res": {"No": 0.6877315198400831, "Yes": 0.31224039220796324}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.35091403501496543, "res": {"No": 0.6490629056068806, "Yes": 0.35091403501496543}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3148201045377178, "res": {"No": 0.6851560338616574, "Yes": 0.3148201045377178}, "ground_truth": 1}, {"key": "36678662", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.34890067365307503, "res": {"No": 0.651076781367177, "Yes": 0.34890067365307503}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.24911067583622196, "res": {"No": 0.7508722336402899, "Yes": 0.24911067583622196}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3839925606075177, "res": {"No": 0.6159929748364077, "Yes": 0.3839925606075177}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3443958005595997, "res": {"No": 0.6555871714184449, "Yes": 0.3443958005595997}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4267517143894564, "res": {"No": 0.5732328928954564, "Yes": 0.4267517143894564}, "ground_truth": 1}, {"key": "35399671", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.31119896889587084, "res": {"No": 0.6887888800120642, "Yes": 0.31119896889587084}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.43080320244982145, "res": {"No": 0.5691789472560013, "Yes": 0.43080320244982145}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.21379732332439239, "res": {"No": 0.7861779206648188, "Yes": 0.21379732332439239}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.1847898214546583, "res": {"No": 0.8151863023694971, "Yes": 0.1847898214546583}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.22939405271850133, "res": {"No": 0.7705904036161886, "Yes": 0.22939405271850133}, "ground_truth": 1}, {"key": "36888180", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3556993241805047, "res": {"No": 0.6442775290360913, "Yes": 0.3556993241805047}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.26436715589375515, "res": {"No": 0.7356113182140221, "Yes": 0.26436715589375515}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.33494715696579763, "res": {"No": 0.665040305758398, "Yes": 0.33494715696579763}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.38034468561152046, "res": {"No": 0.6196418928444476, "Yes": 0.38034468561152046}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3450482902523383, "res": {"No": 0.6549314207326252, "Yes": 0.3450482902523383}, "ground_truth": 1}, {"key": "28061069", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3461858186367607, "res": {"No": 0.6537964921710574, "Yes": 0.3461858186367607}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.20644567009730674, "res": {"No": 0.7935410083585634, "Yes": 0.20644567009730674}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.24702167331229732, "res": {"No": 0.7529615913398279, "Yes": 0.24702167331229732}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3350312392540818, "res": {"No": 0.6649472731274068, "Yes": 0.3350312392540818}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.339956367666373, "res": {"No": 0.6600195455041079, "Yes": 0.339956367666373}, "ground_truth": 1}, {"key": "22259982", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.40124017131621653, "res": {"No": 0.5987327743641757, "Yes": 0.40124017131621653}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.26402415072524377, "res": {"No": 0.7359528314579261, "Yes": 0.26402415072524377}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2675704454070015, "res": {"No": 0.7324103534127154, "Yes": 0.2675704454070015}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.32222217077744786, "res": {"No": 0.6777568931614825, "Yes": 0.32222217077744786}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.15496631628025567, "res": {"No": 0.8450160987764473, "Yes": 0.15496631628025567}, "ground_truth": 1}, {"key": "34026805", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.22679430900815484, "res": {"No": 0.7731893157240233, "Yes": 0.22679430900815484}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.06929017207103955, "res": {"No": 0.9306995512077008, "Yes": 0.06929017207103955}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3701106181695837, "res": {"No": 0.6298664539022455, "Yes": 0.3701106181695837}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3407037662260106, "res": {"No": 0.6592538195399222, "Yes": 0.3407037662260106}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4661745426228696, "res": {"No": 0.5337922290024177, "Yes": 0.4661745426228696}, "ground_truth": 1}, {"key": "36713809", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4195599731014781, "res": {"No": 0.5803952917981059, "Yes": 0.4195599731014781}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.42052071415900205, "res": {"No": 0.5794475670114544, "Yes": 0.42052071415900205}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.06625831583203835, "res": {"No": 0.9337262947555682, "Yes": 0.06625831583203835}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.29224791878701617, "res": {"No": 0.7077301514225912, "Yes": 0.29224791878701617}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.18414901223881833, "res": {"No": 0.8158345107820364, "Yes": 0.18414901223881833}, "ground_truth": 1}, {"key": "39726411", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4066321542998851, "res": {"No": 0.5933436718794465, "Yes": 0.4066321542998851}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.45054735090944437, "res": {"No": 0.5494296987960191, "Yes": 0.45054735090944437}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2354012909738531, "res": {"No": 0.7645732835993466, "Yes": 0.2354012909738531}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.34855619973132046, "res": {"No": 0.6514222822606914, "Yes": 0.34855619973132046}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.30175812291893667, "res": {"No": 0.6982151331680807, "Yes": 0.30175812291893667}, "ground_truth": 1}, {"key": "37069841", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3387115563560993, "res": {"No": 0.6612678710581052, "Yes": 0.3387115563560993}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.04613082026384707, "res": {"No": 0.9538563456041997, "Yes": 0.04613082026384707}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4829755247543223, "res": {"No": 0.5170020703170523, "Yes": 0.4829755247543223}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.32806549849498606, "res": {"No": 0.6719088214706571, "Yes": 0.32806549849498606}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5630013235258532, "res": {"Yes": 0.5630013235258532, "No": 0.43696726288470505}, "ground_truth": 1}, {"key": "38894693", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4083356382473348, "res": {"No": 0.5916381287515237, "Yes": 0.4083356382473348}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3436995858851756, "res": {"No": 0.656274048771956, "Yes": 0.3436995858851756}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.48816235196727154, "res": {"No": 0.5118142309280969, "Yes": 0.48816235196727154}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3371293143922929, "res": {"No": 0.6628452465455491, "Yes": 0.3371293143922929}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.289108305950703, "res": {"No": 0.7108717394763363, "Yes": 0.289108305950703}, "ground_truth": 1}, {"key": "33946032", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3298295411318823, "res": {"No": 0.6701485196789067, "Yes": 0.3298295411318823}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3719476464360897, "res": {"No": 0.6280296250374916, "Yes": 0.3719476464360897}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1494909983006499, "res": {"No": 0.8504935167875709, "Yes": 0.1494909983006499}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3588129345416775, "res": {"No": 0.6411507694298381, "Yes": 0.3588129345416775}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.261582175026112, "res": {"No": 0.7383899694900449, "Yes": 0.261582175026112}, "ground_truth": 1}, {"key": "39035311", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.22823880678194894, "res": {"No": 0.7717391613103101, "Yes": 0.22823880678194894}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.056557667366296714, "res": {"No": 0.9434272911944457, "Yes": 0.056557667366296714}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.18076064973197187, "res": {"No": 0.8192231957280838, "Yes": 0.18076064973197187}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3237262913504167, "res": {"No": 0.6762521970190716, "Yes": 0.3237262913504167}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.22003307566466415, "res": {"No": 0.7799525768945234, "Yes": 0.22003307566466415}, "ground_truth": 1}, {"key": "27680038", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.378148236569691, "res": {"No": 0.6218352081568408, "Yes": 0.378148236569691}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2845348890698809, "res": {"No": 0.7154516195154823, "Yes": 0.2845348890698809}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.07928286118440914, "res": {"No": 0.9207069961389325, "Yes": 0.07928286118440914}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3643249827172883, "res": {"No": 0.6356504014677712, "Yes": 0.3643249827172883}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3758243270740034, "res": {"No": 0.6241607073133619, "Yes": 0.3758243270740034}, "ground_truth": 1}, {"key": "36901907", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3091548793460177, "res": {"No": 0.6908227502904133, "Yes": 0.3091548793460177}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.27970136459647893, "res": {"No": 0.720283798536846, "Yes": 0.27970136459647893}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.47707923060018814, "res": {"No": 0.5228967777893004, "Yes": 0.47707923060018814}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.43713679536467676, "res": {"No": 0.5628331463739904, "Yes": 0.43713679536467676}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.45583947953701764, "res": {"No": 0.5441311916932801, "Yes": 0.45583947953701764}, "ground_truth": 1}, {"key": "21530542", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5336648813697545, "res": {"Yes": 0.5336648813697545, "No": 0.4663096409304376}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4601519122480727, "res": {"No": 0.5398253055525033, "Yes": 0.4601519122480727}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5883949340477648, "res": {"Yes": 0.5883949340477648, "No": 0.41157913859183953}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.20907925256906693, "res": {"No": 0.7908996531562319, "Yes": 0.20907925256906693}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.21690762658736398, "res": {"No": 0.7830565761360874, "Yes": 0.21690762658736398}, "ground_truth": 1}, {"key": "38192532", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.23255374950643998, "res": {"No": 0.767406508476909, "Yes": 0.23255374950643998}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3335915962206335, "res": {"No": 0.666367006106303, "Yes": 0.3335915962206335}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5080271438782181, "res": {"Yes": 0.5080271438782181, "No": 0.4919274168994925}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.47464788717348, "res": {"No": 0.5253161747729904, "Yes": 0.47464788717348}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.34199279821230805, "res": {"No": 0.6579609802151812, "Yes": 0.34199279821230805}, "ground_truth": 1}, {"key": "34102400", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.41995782055410846, "res": {"No": 0.580005669783882, "Yes": 0.41995782055410846}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.31338816907878747, "res": {"No": 0.6865843992235786, "Yes": 0.31338816907878747}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2549683650874982, "res": {"No": 0.7450155413110202, "Yes": 0.2549683650874982}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.310484747872085, "res": {"No": 0.6895016766928228, "Yes": 0.310484747872085}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.48910983660266516, "res": {"No": 0.5108687863150736, "Yes": 0.48910983660266516}, "ground_truth": 1}, {"key": "36133399", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.35729275186884396, "res": {"No": 0.6426885514329362, "Yes": 0.35729275186884396}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.31642412423288646, "res": {"No": 0.683560166244156, "Yes": 0.31642412423288646}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2469556532429963, "res": {"No": 0.753021755374497, "Yes": 0.2469556532429963}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.40302171303019957, "res": {"No": 0.5969524601498815, "Yes": 0.40302171303019957}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4429490441698995, "res": {"No": 0.5570234942290319, "Yes": 0.4429490441698995}, "ground_truth": 1}, {"key": "34314544", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.37919172541588353, "res": {"No": 0.6207849830998592, "Yes": 0.37919172541588353}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.30565807614608315, "res": {"No": 0.6943220896606579, "Yes": 0.30565807614608315}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1768214168651286, "res": {"No": 0.8231689119963943, "Yes": 0.1768214168651286}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4335913536064751, "res": {"No": 0.56638625737845, "Yes": 0.4335913536064751}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2761020597263912, "res": {"No": 0.723883254709433, "Yes": 0.2761020597263912}, "ground_truth": 1}, {"key": "33460074", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4163117501422384, "res": {"No": 0.5836681814963176, "Yes": 0.4163117501422384}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3067237419101477, "res": {"No": 0.6932591263401642, "Yes": 0.3067237419101477}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.17075961017113903, "res": {"No": 0.8292184817642528, "Yes": 0.17075961017113903}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.46291769744269273, "res": {"No": 0.5370574537354212, "Yes": 0.46291769744269273}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3417991141583089, "res": {"No": 0.65817936079926, "Yes": 0.3417991141583089}, "ground_truth": 1}, {"key": "36191495", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2447245467292528, "res": {"No": 0.7552596965020176, "Yes": 0.2447245467292528}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3126532906488217, "res": {"No": 0.6873314315350412, "Yes": 0.3126532906488217}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.23066413212277256, "res": {"No": 0.7693227935559869, "Yes": 0.23066413212277256}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3502559871423698, "res": {"No": 0.6497254694510569, "Yes": 0.3502559871423698}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.47808659586557734, "res": {"No": 0.5218833413915425, "Yes": 0.47808659586557734}, "ground_truth": 1}, {"key": "39532668", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4344202045712888, "res": {"No": 0.5655575049009796, "Yes": 0.4344202045712888}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3869422739492828, "res": {"No": 0.6130369914032282, "Yes": 0.3869422739492828}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.027743968881903308, "res": {"No": 0.972235370612086, "Yes": 0.027743968881903308}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4243257459362302, "res": {"No": 0.5756444978844222, "Yes": 0.4243257459362302}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.40399886921406125, "res": {"No": 0.5959747968353057, "Yes": 0.40399886921406125}, "ground_truth": 1}, {"key": "20328247", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3392076914690427, "res": {"No": 0.660771709626769, "Yes": 0.3392076914690427}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4234547154022267, "res": {"No": 0.5765165684289791, "Yes": 0.4234547154022267}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4595504729590018, "res": {"No": 0.5404262494305491, "Yes": 0.4595504729590018}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.264780764281325, "res": {"No": 0.7352012189189835, "Yes": 0.264780764281325}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.42465895118812547, "res": {"No": 0.5753212836821389, "Yes": 0.42465895118812547}, "ground_truth": 1}, {"key": "39112675", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4192286725733808, "res": {"No": 0.5807461538980733, "Yes": 0.4192286725733808}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3256676487509782, "res": {"No": 0.6743185187181023, "Yes": 0.3256676487509782}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1556134237612159, "res": {"No": 0.8443712849841896, "Yes": 0.1556134237612159}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.37547000166895195, "res": {"No": 0.6245042275149161, "Yes": 0.37547000166895195}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4106530013680192, "res": {"No": 0.5893158212309586, "Yes": 0.4106530013680192}, "ground_truth": 1}, {"key": "31620300", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.413969015394458, "res": {"No": 0.5860054729169597, "Yes": 0.413969015394458}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.42689242481986855, "res": {"No": 0.5730796882199026, "Yes": 0.42689242481986855}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.16010324349546767, "res": {"No": 0.8398800457673204, "Yes": 0.16010324349546767}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.40290724694078456, "res": {"No": 0.597067563679822, "Yes": 0.40290724694078456}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.34713802901042423, "res": {"No": 0.6528458642919728, "Yes": 0.34713802901042423}, "ground_truth": 1}, {"key": "37518509", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3186014596712155, "res": {"No": 0.681380235431491, "Yes": 0.3186014596712155}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.25800866741320005, "res": {"No": 0.7419747990025175, "Yes": 0.25800866741320005}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.0636660898240435, "res": {"No": 0.9363160829981467, "Yes": 0.0636660898240435}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5268960702716324, "res": {"Yes": 0.5268960702716324, "No": 0.4730747354054527}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4353923937075483, "res": {"No": 0.5645831543729989, "Yes": 0.4353923937075483}, "ground_truth": 1}, {"key": "35454095", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5552129972672849, "res": {"Yes": 0.5552129972672849, "No": 0.4447625387627449}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4539287325613687, "res": {"No": 0.5460480518818582, "Yes": 0.4539287325613687}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4183787735296817, "res": {"No": 0.5815941856359244, "Yes": 0.4183787735296817}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4601203469095959, "res": {"No": 0.5398531828510676, "Yes": 0.4601203469095959}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5081710883568515, "res": {"Yes": 0.5081710883568515, "No": 0.491805040582413}, "ground_truth": 1}, {"key": "38542788", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.40206605015152574, "res": {"No": 0.597906596724058, "Yes": 0.40206605015152574}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2966813831591404, "res": {"No": 0.7033041042373955, "Yes": 0.2966813831591404}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.17230620901960575, "res": {"No": 0.8276765756689108, "Yes": 0.17230620901960575}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5100609503063281, "res": {"Yes": 0.5100609503063281, "No": 0.4899233444565753}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4855374777987032, "res": {"No": 0.514446275477958, "Yes": 0.4855374777987032}, "ground_truth": 1}, {"key": "23944937", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4012027774748299, "res": {"No": 0.5987763337579645, "Yes": 0.4012027774748299}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3912277921565381, "res": {"No": 0.6087500027913242, "Yes": 0.3912277921565381}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4512344634299084, "res": {"No": 0.5487384514542972, "Yes": 0.4512344634299084}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.397183356402089, "res": {"No": 0.6027903807328727, "Yes": 0.397183356402089}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.45336556145598517, "res": {"No": 0.5466129575734155, "Yes": 0.45336556145598517}, "ground_truth": 1}, {"key": "31753944", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5227983778754428, "res": {"Yes": 0.5227983778754428, "No": 0.4771750854459126}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5135210794091926, "res": {"Yes": 0.5135210794091926, "No": 0.48646189752589436}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1937367887829935, "res": {"No": 0.8062488155165449, "Yes": 0.1937367887829935}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.26465994591335584, "res": {"No": 0.7353257504950906, "Yes": 0.26465994591335584}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2997970000104392, "res": {"No": 0.7001869557186974, "Yes": 0.2997970000104392}, "ground_truth": 1}, {"key": "35527214", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.35618297694147144, "res": {"No": 0.6438001352736152, "Yes": 0.35618297694147144}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.1594350100967769, "res": {"No": 0.8405501845033252, "Yes": 0.1594350100967769}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.20742623427198587, "res": {"No": 0.7925538230629592, "Yes": 0.20742623427198587}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.17403406901446686, "res": {"No": 0.8259502642883388, "Yes": 0.17403406901446686}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.34605792660628215, "res": {"No": 0.6539277746097585, "Yes": 0.34605792660628215}, "ground_truth": 1}, {"key": "40400404", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.45787418859272017, "res": {"No": 0.5421070433970157, "Yes": 0.45787418859272017}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.38576802564641344, "res": {"No": 0.6142150302570017, "Yes": 0.38576802564641344}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.00928578354014652, "res": {"No": 0.9907061859532357, "Yes": 0.00928578354014652}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.21502022188463984, "res": {"No": 0.7849631853050762, "Yes": 0.21502022188463984}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.24201915574186458, "res": {"No": 0.7579566082735697, "Yes": 0.24201915574186458}, "ground_truth": 1}, {"key": "21713119", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4001179101247445, "res": {"No": 0.599861574230129, "Yes": 0.4001179101247445}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.29957493271952385, "res": {"No": 0.70039600673012, "Yes": 0.29957493271952385}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3504920341043988, "res": {"No": 0.6494827968378603, "Yes": 0.3504920341043988}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4603819076181805, "res": {"No": 0.5395937162049387, "Yes": 0.4603819076181805}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4400065569595813, "res": {"No": 0.5599705645152888, "Yes": 0.4400065569595813}, "ground_truth": 1}, {"key": "28730678", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.461148067462933, "res": {"No": 0.5388313776330867, "Yes": 0.461148067462933}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4637263795233621, "res": {"No": 0.5362499296117815, "Yes": 0.4637263795233621}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2012319210521414, "res": {"No": 0.7987405534715618, "Yes": 0.2012319210521414}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3129846597582686, "res": {"No": 0.6869806521264248, "Yes": 0.3129846597582686}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.26792279991156887, "res": {"No": 0.73205273153448, "Yes": 0.26792279991156887}, "ground_truth": 1}, {"key": "36823733", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2322553653361548, "res": {"No": 0.7677182078706564, "Yes": 0.2322553653361548}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2422329045874121, "res": {"No": 0.7577466909464864, "Yes": 0.2422329045874121}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.40779746383516524, "res": {"No": 0.5921772974501065, "Yes": 0.40779746383516524}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.364274345064089, "res": {"No": 0.6356881729368057, "Yes": 0.364274345064089}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.46322024354380226, "res": {"No": 0.5367497542883839, "Yes": 0.46322024354380226}, "ground_truth": 1}, {"key": "35988862", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.37936791226696165, "res": {"No": 0.6206080535585075, "Yes": 0.37936791226696165}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.18336552755408345, "res": {"No": 0.8166169447047354, "Yes": 0.18336552755408345}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.365746320237717, "res": {"No": 0.6342328250614119, "Yes": 0.365746320237717}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.1789105712391453, "res": {"No": 0.8210758306756798, "Yes": 0.1789105712391453}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.31135768971254413, "res": {"No": 0.6886251123762109, "Yes": 0.31135768971254413}, "ground_truth": 1}, {"key": "40499665", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2609358727823749, "res": {"No": 0.7390437338611073, "Yes": 0.2609358727823749}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2559937625086492, "res": {"No": 0.7439893044437687, "Yes": 0.2559937625086492}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1679402361618814, "res": {"No": 0.8320469745159274, "Yes": 0.1679402361618814}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.34157349869837156, "res": {"No": 0.6584089211297708, "Yes": 0.34157349869837156}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.47231440635212574, "res": {"No": 0.5276610986356104, "Yes": 0.47231440635212574}, "ground_truth": 1}, {"key": "32829820", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.37403167224729317, "res": {"No": 0.6259540081437364, "Yes": 0.37403167224729317}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2798728461079813, "res": {"No": 0.7201125930225515, "Yes": 0.2798728461079813}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.37006208134488255, "res": {"No": 0.6299160137444971, "Yes": 0.37006208134488255}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3068336914010201, "res": {"No": 0.693141289237126, "Yes": 0.3068336914010201}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3970849783266526, "res": {"No": 0.6028951608345988, "Yes": 0.3970849783266526}, "ground_truth": 1}, {"key": "20583553", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4994372921578568, "res": {"No": 0.5005444865305015, "Yes": 0.4994372921578568}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4210423316992882, "res": {"No": 0.5789382248116145, "Yes": 0.4210423316992882}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5430537645747192, "res": {"Yes": 0.5430537645747192, "No": 0.45691262451006853}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.334320555253318, "res": {"No": 0.6656612975770395, "Yes": 0.334320555253318}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3818640590776005, "res": {"No": 0.6181135194269317, "Yes": 0.3818640590776005}, "ground_truth": 1}, {"key": "30501550", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.30904354181157095, "res": {"No": 0.6909362825210887, "Yes": 0.30904354181157095}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.25305401880045664, "res": {"No": 0.7469274353501871, "Yes": 0.25305401880045664}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.41339102590043597, "res": {"No": 0.5865830196063638, "Yes": 0.41339102590043597}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.34257587944249634, "res": {"No": 0.6574054418166997, "Yes": 0.34257587944249634}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.28269098605139775, "res": {"No": 0.7172857372408263, "Yes": 0.28269098605139775}, "ground_truth": 1}, {"key": "38755897", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3672070210177079, "res": {"No": 0.6327667741444674, "Yes": 0.3672070210177079}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.45156351522259663, "res": {"No": 0.5484111497949619, "Yes": 0.45156351522259663}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3840919123335385, "res": {"No": 0.6158832885742035, "Yes": 0.3840919123335385}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.41558537934821665, "res": {"No": 0.5843971943640431, "Yes": 0.41558537934821665}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.33185770544409643, "res": {"No": 0.6681255994439079, "Yes": 0.33185770544409643}, "ground_truth": 1}, {"key": "35507201", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2811288707531821, "res": {"No": 0.7188578183367316, "Yes": 0.2811288707531821}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.44271703508908383, "res": {"No": 0.5572629320926347, "Yes": 0.44271703508908383}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3486051404573213, "res": {"No": 0.6513795373330059, "Yes": 0.3486051404573213}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3323829203974934, "res": {"No": 0.6675907519310755, "Yes": 0.3323829203974934}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3195935649475544, "res": {"No": 0.6803868265360065, "Yes": 0.3195935649475544}, "ground_truth": 1}, {"key": "36453511", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.298176704527449, "res": {"No": 0.7018071344178787, "Yes": 0.298176704527449}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.38539611634448895, "res": {"No": 0.6145810271518779, "Yes": 0.38539611634448895}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.27753503541978297, "res": {"No": 0.7224460377645566, "Yes": 0.27753503541978297}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.15383882739008511, "res": {"No": 0.8461475378154253, "Yes": 0.15383882739008511}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3427464561683074, "res": {"No": 0.6572311472987359, "Yes": 0.3427464561683074}, "ground_truth": 1}, {"key": "38066835", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3113326886938645, "res": {"No": 0.6886380448772571, "Yes": 0.3113326886938645}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.30395752735033815, "res": {"No": 0.6960245543719312, "Yes": 0.30395752735033815}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.289207226821619, "res": {"No": 0.710775920424204, "Yes": 0.289207226821619}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.233235198250825, "res": {"No": 0.766752619213101, "Yes": 0.233235198250825}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4455780281701702, "res": {"No": 0.5544060047979316, "Yes": 0.4455780281701702}, "ground_truth": 1}, {"key": "39697181", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.36703730044582433, "res": {"No": 0.6329401190309384, "Yes": 0.36703730044582433}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3832661049748524, "res": {"No": 0.6167173461554046, "Yes": 0.3832661049748524}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.30374975223954237, "res": {"No": 0.6962336785182298, "Yes": 0.30374975223954237}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.23849486482920104, "res": {"No": 0.7614823384664459, "Yes": 0.23849486482920104}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.24838540169341014, "res": {"No": 0.7515934396450599, "Yes": 0.24838540169341014}, "ground_truth": 1}, {"key": "21820893", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.33779343816646346, "res": {"No": 0.6621854012129699, "Yes": 0.33779343816646346}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.22828144577341364, "res": {"No": 0.7716948647537741, "Yes": 0.22828144577341364}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.06510944863349405, "res": {"No": 0.934887105279549, "Yes": 0.06510944863349405}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5089797230815859, "res": {"Yes": 0.5089797230815859, "No": 0.491008912314004}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.49821692720751026, "res": {"No": 0.5017779087662835, "Yes": 0.49821692720751026}, "ground_truth": 1}, {"key": "40519933", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5878269365506037, "res": {"Yes": 0.5878269365506037, "No": 0.41216253706124506}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5422433137056754, "res": {"Yes": 0.5422433137056754, "No": 0.4577447612769234}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2567305468340291, "res": {"No": 0.743255683731728, "Yes": 0.2567305468340291}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5344419823813625, "res": {"Yes": 0.5344419823813625, "No": 0.46553639526707824}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5662466603720441, "res": {"Yes": 0.5662466603720441, "No": 0.4337292879435412}, "ground_truth": 1}, {"key": "30446033", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5780181554441736, "res": {"Yes": 0.5780181554441736, "No": 0.4219589392650057}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.47923135612692475, "res": {"No": 0.5207387669401352, "Yes": 0.47923135612692475}, "ground_truth": 0}, {"key": "40216291", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.163242459791418, "res": {"No": 0.8367420332497203, "Yes": 0.163242459791418}, "ground_truth": 0}, {"key": "40216291", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5294567489981139, "res": {"Yes": 0.5294567489981139, "No": 0.47051445396164004}, "ground_truth": 0}, {"key": "40216291", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.46736791730562793, "res": {"No": 0.5326139572866665, "Yes": 0.46736791730562793}, "ground_truth": 1}, {"key": "40216291", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.48934035946459337, "res": {"No": 0.5106379462275831, "Yes": 0.48934035946459337}, "ground_truth": 0}, {"key": "40216291", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3935194044059839, "res": {"No": 0.6064600636526133, "Yes": 0.3935194044059839}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.26601765505919495, "res": {"No": 0.7339646810396087, "Yes": 0.26601765505919495}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.41461131001956003, "res": {"No": 0.5853707920343112, "Yes": 0.41461131001956003}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5232380269505028, "res": {"Yes": 0.5232380269505028, "No": 0.4767363339195153}, "ground_truth": 1}, {"key": "33479118", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5148194614861847, "res": {"Yes": 0.5148194614861847, "No": 0.48515905177582}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.32907193654670863, "res": {"No": 0.6709124415622397, "Yes": 0.32907193654670863}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.03812243595330482, "res": {"No": 0.9618613476173016, "Yes": 0.03812243595330482}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.26366466277665856, "res": {"No": 0.7363090231227645, "Yes": 0.26366466277665856}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.133651114164131, "res": {"No": 0.8663289176511727, "Yes": 0.133651114164131}, "ground_truth": 1}, {"key": "22297373", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3484392438969773, "res": {"No": 0.651524250856165, "Yes": 0.3484392438969773}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3118231745725519, "res": {"No": 0.6881527605582083, "Yes": 0.3118231745725519}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2664325934538626, "res": {"No": 0.73354509799257, "Yes": 0.2664325934538626}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.12585007014891175, "res": {"No": 0.8741326686956431, "Yes": 0.12585007014891175}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.33273004282148105, "res": {"No": 0.6672486726392295, "Yes": 0.33273004282148105}, "ground_truth": 1}, {"key": "36463668", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4441256993379607, "res": {"No": 0.5558474238093718, "Yes": 0.4441256993379607}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2610596379156422, "res": {"No": 0.7389106736261608, "Yes": 0.2610596379156422}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.34006588524925824, "res": {"No": 0.659914868105656, "Yes": 0.34006588524925824}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2952394963446931, "res": {"No": 0.7047404711311532, "Yes": 0.2952394963446931}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.47950397251027527, "res": {"No": 0.5204767814954444, "Yes": 0.47950397251027527}, "ground_truth": 1}, {"key": "35264615", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5288882061429029, "res": {"Yes": 0.5288882061429029, "No": 0.4710825900091004}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.19434938229524856, "res": {"No": 0.8056326255092858, "Yes": 0.19434938229524856}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.10222612578266949, "res": {"No": 0.8977572681020503, "Yes": 0.10222612578266949}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.29362691914017025, "res": {"No": 0.7063602093914212, "Yes": 0.29362691914017025}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3590831865236863, "res": {"No": 0.6408973533909411, "Yes": 0.3590831865236863}, "ground_truth": 1}, {"key": "39898482", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3147415667196359, "res": {"No": 0.6852359619747239, "Yes": 0.3147415667196359}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.29736019417410364, "res": {"No": 0.702624590833405, "Yes": 0.29736019417410364}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4962293648557295, "res": {"No": 0.5037459023737425, "Yes": 0.4962293648557295}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3013582568177608, "res": {"No": 0.6986278653410539, "Yes": 0.3013582568177608}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4520259451603654, "res": {"No": 0.5479622013783496, "Yes": 0.4520259451603654}, "ground_truth": 1}, {"key": "37228721", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.47293910067263445, "res": {"No": 0.5270434795258037, "Yes": 0.47293910067263445}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.48634613337076404, "res": {"No": 0.5136388432938144, "Yes": 0.48634613337076404}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.019035416664486222, "res": {"No": 0.9809506318451062, "Yes": 0.019035416664486222}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.299775984977324, "res": {"No": 0.7001979487401971, "Yes": 0.299775984977324}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5048644003104057, "res": {"Yes": 0.5048644003104057, "No": 0.4950956513650654}, "ground_truth": 1}, {"key": "24535799", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3682339165314443, "res": {"No": 0.6317310368640766, "Yes": 0.3682339165314443}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2963032961788295, "res": {"No": 0.7036723381034773, "Yes": 0.2963032961788295}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.11572109921494336, "res": {"No": 0.8842694244416183, "Yes": 0.11572109921494336}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.31357716777385686, "res": {"No": 0.6864089993178691, "Yes": 0.31357716777385686}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3532072442954146, "res": {"No": 0.6467705397583203, "Yes": 0.3532072442954146}, "ground_truth": 1}, {"key": "35177759", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4546332813766648, "res": {"No": 0.5453372872802137, "Yes": 0.4546332813766648}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3350341540385427, "res": {"No": 0.6649492347247559, "Yes": 0.3350341540385427}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.22852135858420347, "res": {"No": 0.7714619483819523, "Yes": 0.22852135858420347}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.37744985634123746, "res": {"No": 0.6225292523627266, "Yes": 0.37744985634123746}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3561615709878126, "res": {"No": 0.643820415297285, "Yes": 0.3561615709878126}, "ground_truth": 1}, {"key": "34364829", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.37932003905729667, "res": {"No": 0.620657567644111, "Yes": 0.37932003905729667}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.35762254227063767, "res": {"No": 0.6423549156050857, "Yes": 0.35762254227063767}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.33298644351351475, "res": {"No": 0.6669940190821797, "Yes": 0.33298644351351475}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.42371304225856365, "res": {"No": 0.5762535408777031, "Yes": 0.42371304225856365}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3895800791757877, "res": {"No": 0.610394243352003, "Yes": 0.3895800791757877}, "ground_truth": 1}, {"key": "38090732", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.44732841274679214, "res": {"No": 0.5526413727726307, "Yes": 0.44732841274679214}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4155722886149473, "res": {"No": 0.5844077486726784, "Yes": 0.4155722886149473}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.21306112129492374, "res": {"No": 0.7869182196538028, "Yes": 0.21306112129492374}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3675860272383869, "res": {"No": 0.6323949873941149, "Yes": 0.3675860272383869}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.44447656392337537, "res": {"No": 0.5555040606656538, "Yes": 0.44447656392337537}, "ground_truth": 1}, {"key": "30651479", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4219492047845646, "res": {"No": 0.5780246639652469, "Yes": 0.4219492047845646}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4994480451585126, "res": {"No": 0.5005266174112981, "Yes": 0.4994480451585126}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.21723931478603822, "res": {"No": 0.7827431891462495, "Yes": 0.21723931478603822}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.23998599776211477, "res": {"No": 0.7599889388138319, "Yes": 0.23998599776211477}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.32695926098168576, "res": {"No": 0.6730133385462566, "Yes": 0.32695926098168576}, "ground_truth": 1}, {"key": "39380921", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4550288752303917, "res": {"No": 0.5449421700221847, "Yes": 0.4550288752303917}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3002276376189263, "res": {"No": 0.6997471454362777, "Yes": 0.3002276376189263}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.29236799889481935, "res": {"No": 0.7076132723629207, "Yes": 0.29236799889481935}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4861994248744156, "res": {"No": 0.5137728897018577, "Yes": 0.4861994248744156}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5958907524362381, "res": {"Yes": 0.5958907524362381, "No": 0.4040875324536965}, "ground_truth": 1}, {"key": "39037490", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6344581509198318, "res": {"Yes": 0.6344581509198318, "No": 0.3655156802104235}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.44695989891162413, "res": {"No": 0.5530208692090436, "Yes": 0.44695989891162413}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4303598327003491, "res": {"No": 0.5696232509110727, "Yes": 0.4303598327003491}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3800214045822625, "res": {"No": 0.6199624917192792, "Yes": 0.3800214045822625}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.48340857593661685, "res": {"No": 0.5165751512699547, "Yes": 0.48340857593661685}, "ground_truth": 1}, {"key": "35917499", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3843554321288085, "res": {"No": 0.6156244256564848, "Yes": 0.3843554321288085}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.46621918563454445, "res": {"No": 0.5337608963181986, "Yes": 0.46621918563454445}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.34833302096602187, "res": {"No": 0.6516524182811614, "Yes": 0.34833302096602187}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4952507396086241, "res": {"No": 0.5047269192133192, "Yes": 0.4952507396086241}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.36518554941734016, "res": {"No": 0.6347949154817725, "Yes": 0.36518554941734016}, "ground_truth": 1}, {"key": "34908073", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.36449614635630445, "res": {"No": 0.6354834507485811, "Yes": 0.36449614635630445}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3143690389180488, "res": {"No": 0.6856073645480913, "Yes": 0.3143690389180488}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.004316348883226453, "res": {"No": 0.9956737403400495, "Yes": 0.004316348883226453}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.35426516237019534, "res": {"No": 0.6457142051363628, "Yes": 0.35426516237019534}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2633560131530878, "res": {"No": 0.7366249306276512, "Yes": 0.2633560131530878}, "ground_truth": 1}, {"key": "36344759", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.41276414360890146, "res": {"No": 0.5872113927780904, "Yes": 0.41276414360890146}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2718928565585943, "res": {"No": 0.7280881448675849, "Yes": 0.2718928565585943}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.22560682661450138, "res": {"No": 0.7743798604563952, "Yes": 0.22560682661450138}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2902740796755925, "res": {"No": 0.7097131675064675, "Yes": 0.2902740796755925}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.564417191323377, "res": {"Yes": 0.564417191323377, "No": 0.4355617246050971}, "ground_truth": 1}, {"key": "39984637", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4469261547131158, "res": {"No": 0.5530529398191274, "Yes": 0.4469261547131158}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3189341894963065, "res": {"No": 0.6810531015692304, "Yes": 0.3189341894963065}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3814779607169141, "res": {"No": 0.6184951925107082, "Yes": 0.3814779607169141}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.38428550501974434, "res": {"No": 0.6156838485937539, "Yes": 0.38428550501974434}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3590167622786116, "res": {"No": 0.6409572736854402, "Yes": 0.3590167622786116}, "ground_truth": 1}, {"key": "17917326", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2987041082763858, "res": {"No": 0.7012645666181326, "Yes": 0.2987041082763858}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2847691007180662, "res": {"No": 0.7152079781883487, "Yes": 0.2847691007180662}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.16578849989422992, "res": {"No": 0.8341909299756418, "Yes": 0.16578849989422992}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3257778407544772, "res": {"No": 0.6742083305894906, "Yes": 0.3257778407544772}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3565910043624989, "res": {"No": 0.6433965926766398, "Yes": 0.3565910043624989}, "ground_truth": 1}, {"key": "32193638", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.44824039253519427, "res": {"No": 0.5517458805645177, "Yes": 0.44824039253519427}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.37719985976567927, "res": {"No": 0.6227817765297041, "Yes": 0.37719985976567927}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.30299738617352284, "res": {"No": 0.6969893626216372, "Yes": 0.30299738617352284}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3064259044928327, "res": {"No": 0.6935589078558568, "Yes": 0.3064259044928327}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.40085374655929834, "res": {"No": 0.5991255520953448, "Yes": 0.40085374655929834}, "ground_truth": 1}, {"key": "34564692", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3693160675302762, "res": {"No": 0.6306661735097583, "Yes": 0.3693160675302762}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2106833465450075, "res": {"No": 0.7893038456903276, "Yes": 0.2106833465450075}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.38869053182020064, "res": {"No": 0.6112861492893963, "Yes": 0.38869053182020064}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2618152700628719, "res": {"No": 0.738165983336499, "Yes": 0.2618152700628719}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.32266346594977907, "res": {"No": 0.6773118208583674, "Yes": 0.32266346594977907}, "ground_truth": 1}, {"key": "39329284", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.384123447574045, "res": {"No": 0.6158493050719535, "Yes": 0.384123447574045}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.02295783514697278, "res": {"No": 0.9770207975611066, "Yes": 0.02295783514697278}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.274425021791749, "res": {"No": 0.7255527143203024, "Yes": 0.274425021791749}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3562531518999354, "res": {"No": 0.6437292632179468, "Yes": 0.3562531518999354}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.45874635277462594, "res": {"No": 0.5412326963330215, "Yes": 0.45874635277462594}, "ground_truth": 1}, {"key": "37438541", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5030247526768669, "res": {"Yes": 0.5030247526768669, "No": 0.4969529129148156}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.36377413943152165, "res": {"No": 0.6362055347522799, "Yes": 0.36377413943152165}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.10050467853293765, "res": {"No": 0.8994850104894946, "Yes": 0.10050467853293765}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3545594308831787, "res": {"No": 0.6454202848304714, "Yes": 0.3545594308831787}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3783206358227245, "res": {"No": 0.6216632323236188, "Yes": 0.3783206358227245}, "ground_truth": 1}, {"key": "34652757", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.43170273975851226, "res": {"No": 0.5682770585779784, "Yes": 0.43170273975851226}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4655665350689227, "res": {"No": 0.5344093129415313, "Yes": 0.4655665350689227}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5027066398480199, "res": {"Yes": 0.5027066398480199, "No": 0.4972746698050404}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5057945600219188, "res": {"Yes": 0.5057945600219188, "No": 0.49418526585798056}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.48204437733976774, "res": {"No": 0.5179340300235001, "Yes": 0.48204437733976774}, "ground_truth": 1}, {"key": "31361004", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3633445577552048, "res": {"No": 0.6366419559543167, "Yes": 0.3633445577552048}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4999915653511167, "res": {"Yes": 0.4999915653511167, "No": 0.4999829905692999}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.26054218299895715, "res": {"No": 0.7394263249420342, "Yes": 0.26054218299895715}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2656449330628527, "res": {"No": 0.7343371304691277, "Yes": 0.2656449330628527}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3430206601351481, "res": {"No": 0.6569582887210031, "Yes": 0.3430206601351481}, "ground_truth": 1}, {"key": "26150727", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3744810792492909, "res": {"No": 0.6254964028068632, "Yes": 0.3744810792492909}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.40492834234964753, "res": {"No": 0.5950488952036378, "Yes": 0.40492834234964753}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4411319023310095, "res": {"No": 0.5588450978734165, "Yes": 0.4411319023310095}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4041613619487503, "res": {"No": 0.5958154961867377, "Yes": 0.4041613619487503}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4602636783126851, "res": {"No": 0.5397072748582944, "Yes": 0.4602636783126851}, "ground_truth": 1}, {"key": "36997402", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4322325177510414, "res": {"No": 0.5677461949502461, "Yes": 0.4322325177510414}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4150591660990695, "res": {"No": 0.5849187677143374, "Yes": 0.4150591660990695}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.039700249764958666, "res": {"No": 0.9602854547708392, "Yes": 0.039700249764958666}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.43244286732782, "res": {"No": 0.5675438295055302, "Yes": 0.43244286732782}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4723734965803771, "res": {"No": 0.5276103717360687, "Yes": 0.4723734965803771}, "ground_truth": 1}, {"key": "37430643", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4605414115354629, "res": {"No": 0.5394379310119543, "Yes": 0.4605414115354629}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.41538993395813095, "res": {"No": 0.5845915971982878, "Yes": 0.41538993395813095}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2955280862858525, "res": {"No": 0.7044559191539124, "Yes": 0.2955280862858525}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4811714151060852, "res": {"No": 0.5188027841683439, "Yes": 0.4811714151060852}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.43287048784686993, "res": {"No": 0.5671101080590205, "Yes": 0.43287048784686993}, "ground_truth": 1}, {"key": "36964631", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5140454774046072, "res": {"Yes": 0.5140454774046072, "No": 0.48592973195000255}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5056374846266476, "res": {"Yes": 0.5056374846266476, "No": 0.49433344481160013}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.359007858773311, "res": {"No": 0.6409792204382174, "Yes": 0.359007858773311}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.376363291976444, "res": {"No": 0.6236185098442557, "Yes": 0.376363291976444}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.371420201018958, "res": {"No": 0.6285664994160169, "Yes": 0.371420201018958}, "ground_truth": 1}, {"key": "35502013", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.40085233555659383, "res": {"No": 0.5991314415284684, "Yes": 0.40085233555659383}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3317517601938074, "res": {"No": 0.6682347266278924, "Yes": 0.3317517601938074}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2628944127104255, "res": {"No": 0.7370932266621404, "Yes": 0.2628944127104255}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.34484046127222845, "res": {"No": 0.6551403051171831, "Yes": 0.34484046127222845}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3169213319811437, "res": {"No": 0.6830625925858801, "Yes": 0.3169213319811437}, "ground_truth": 1}, {"key": "33987664", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.41269838313684737, "res": {"No": 0.5872869717477434, "Yes": 0.41269838313684737}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.376020919564614, "res": {"No": 0.623963347698808, "Yes": 0.376020919564614}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.43223325254694617, "res": {"No": 0.5677395921003945, "Yes": 0.43223325254694617}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.47574865496429786, "res": {"No": 0.5242306393856602, "Yes": 0.47574865496429786}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5686546335790628, "res": {"Yes": 0.5686546335790628, "No": 0.43132681705278375}, "ground_truth": 1}, {"key": "35203721", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.43629445852456933, "res": {"No": 0.5636815976907739, "Yes": 0.43629445852456933}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.40415759114083416, "res": {"No": 0.595828085901182, "Yes": 0.40415759114083416}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4762947469997733, "res": {"No": 0.523684455608457, "Yes": 0.4762947469997733}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3975644578184925, "res": {"No": 0.6024177725660297, "Yes": 0.3975644578184925}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.48893344649847026, "res": {"No": 0.5110499469566165, "Yes": 0.48893344649847026}, "ground_truth": 1}, {"key": "39028348", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5168794778130573, "res": {"Yes": 0.5168794778130573, "No": 0.48310143839460135}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4542137848660899, "res": {"No": 0.5457712211111232, "Yes": 0.4542137848660899}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4014686222326926, "res": {"No": 0.5985048981345324, "Yes": 0.4014686222326926}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4084997404727119, "res": {"No": 0.5914741493908232, "Yes": 0.4084997404727119}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.47232668668633454, "res": {"No": 0.5276446147603685, "Yes": 0.47232668668633454}, "ground_truth": 1}, {"key": "37459383", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3890050474492219, "res": {"No": 0.610967996542422, "Yes": 0.3890050474492219}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3781659872645211, "res": {"No": 0.6218133945603513, "Yes": 0.3781659872645211}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5199591126590252, "res": {"Yes": 0.5199591126590252, "No": 0.4800038472539142}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3297291233259945, "res": {"No": 0.6702460735972331, "Yes": 0.3297291233259945}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3366196570800341, "res": {"No": 0.6633546730646138, "Yes": 0.3366196570800341}, "ground_truth": 1}, {"key": "34020070", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4372426171325576, "res": {"No": 0.5627246200944839, "Yes": 0.4372426171325576}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3269226435950678, "res": {"No": 0.6730482890364089, "Yes": 0.3269226435950678}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.21305213030531628, "res": {"No": 0.7869225083697867, "Yes": 0.21305213030531628}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3838230461196966, "res": {"No": 0.6161510219460082, "Yes": 0.3838230461196966}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3458177073958093, "res": {"No": 0.6541497030324859, "Yes": 0.3458177073958093}, "ground_truth": 1}, {"key": "35176615", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.24146843943475155, "res": {"No": 0.7585128568307703, "Yes": 0.24146843943475155}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.27201858168530524, "res": {"No": 0.7279594739398458, "Yes": 0.27201858168530524}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.33367263546117276, "res": {"No": 0.6663090613509265, "Yes": 0.33367263546117276}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3558922366479192, "res": {"No": 0.6440922614614947, "Yes": 0.3558922366479192}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4159792083074855, "res": {"No": 0.5840022753583161, "Yes": 0.4159792083074855}, "ground_truth": 1}, {"key": "33296389", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4679198305415705, "res": {"No": 0.5320644023066015, "Yes": 0.4679198305415705}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3337025338687388, "res": {"No": 0.6662797510600023, "Yes": 0.3337025338687388}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4913570824552918, "res": {"No": 0.5086241722950099, "Yes": 0.4913570824552918}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.42530407299960316, "res": {"No": 0.5746795487842092, "Yes": 0.42530407299960316}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.48245906556949386, "res": {"No": 0.5175210905433096, "Yes": 0.48245906556949386}, "ground_truth": 1}, {"key": "35399504", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.45588124090458826, "res": {"No": 0.5440959331344479, "Yes": 0.45588124090458826}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.41668074913543207, "res": {"No": 0.5832981364892873, "Yes": 0.41668074913543207}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.23410250593341453, "res": {"No": 0.7658844110810491, "Yes": 0.23410250593341453}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.38183406483379934, "res": {"No": 0.6181498655704403, "Yes": 0.38183406483379934}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.520706487344489, "res": {"Yes": 0.520706487344489, "No": 0.4792765977023119}, "ground_truth": 1}, {"key": "34807886", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.479603048265169, "res": {"No": 0.5203768283236007, "Yes": 0.479603048265169}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4321690706745649, "res": {"No": 0.5678144421444533, "Yes": 0.4321690706745649}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1411850553248585, "res": {"No": 0.8588058510050179, "Yes": 0.1411850553248585}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.29148696242691513, "res": {"No": 0.708502883495586, "Yes": 0.29148696242691513}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.33028188241406564, "res": {"No": 0.6697045325892274, "Yes": 0.33028188241406564}, "ground_truth": 1}, {"key": "37629813", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4636521428728029, "res": {"No": 0.5363287319659058, "Yes": 0.4636521428728029}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3079352639747611, "res": {"No": 0.6920512814282512, "Yes": 0.3079352639747611}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.11546779720194293, "res": {"No": 0.8845177126279923, "Yes": 0.11546779720194293}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.35800819563796366, "res": {"No": 0.6419777328037447, "Yes": 0.35800819563796366}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.37280659719080966, "res": {"No": 0.6271777284895265, "Yes": 0.37280659719080966}, "ground_truth": 1}, {"key": "28084389", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2919391175483708, "res": {"No": 0.7080484396811879, "Yes": 0.2919391175483708}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3129507966501099, "res": {"No": 0.6870372960163049, "Yes": 0.3129507966501099}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.19845226229389004, "res": {"No": 0.801531056268086, "Yes": 0.19845226229389004}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.15751003633259852, "res": {"No": 0.8424768288352527, "Yes": 0.15751003633259852}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.28212165510194076, "res": {"No": 0.7178578859187593, "Yes": 0.28212165510194076}, "ground_truth": 1}, {"key": "35391734", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3569323182178222, "res": {"No": 0.6430353625930944, "Yes": 0.3569323182178222}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.38462258277021677, "res": {"No": 0.6153547181303968, "Yes": 0.38462258277021677}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.18284426488755978, "res": {"No": 0.8171427048794185, "Yes": 0.18284426488755978}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4122770764560141, "res": {"No": 0.5877043874105248, "Yes": 0.4122770764560141}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.20731819335103396, "res": {"No": 0.7926714705579093, "Yes": 0.20731819335103396}, "ground_truth": 1}, {"key": "40214591", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.33057599719396674, "res": {"No": 0.6694078254689079, "Yes": 0.33057599719396674}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3759482041505774, "res": {"No": 0.6240330794991051, "Yes": 0.3759482041505774}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.36135316589269323, "res": {"No": 0.6386275361964077, "Yes": 0.36135316589269323}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4129362484676315, "res": {"No": 0.5870406917537645, "Yes": 0.4129362484676315}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.45472783674538636, "res": {"No": 0.5452478865607883, "Yes": 0.45472783674538636}, "ground_truth": 1}, {"key": "26283171", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.426489214825865, "res": {"No": 0.5734837945026553, "Yes": 0.426489214825865}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4210770985999483, "res": {"No": 0.5788998830026882, "Yes": 0.4210770985999483}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3799355294490825, "res": {"No": 0.620047041769189, "Yes": 0.3799355294490825}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4611150504433261, "res": {"No": 0.5388585893047462, "Yes": 0.4611150504433261}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3654128020272279, "res": {"No": 0.6345664114049812, "Yes": 0.3654128020272279}, "ground_truth": 1}, {"key": "37084030", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5951622985258331, "res": {"Yes": 0.5951622985258331, "No": 0.4048155288342441}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4258948901050138, "res": {"No": 0.5740844890658259, "Yes": 0.4258948901050138}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.43380554425509504, "res": {"No": 0.5661712413399016, "Yes": 0.43380554425509504}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5112087037237909, "res": {"Yes": 0.5112087037237909, "No": 0.4887645345084622}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.37871079843392363, "res": {"No": 0.6212642277640877, "Yes": 0.37871079843392363}, "ground_truth": 1}, {"key": "39027295", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4236572810545185, "res": {"No": 0.5763139930448019, "Yes": 0.4236572810545185}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.40120590686869884, "res": {"No": 0.598770735225417, "Yes": 0.40120590686869884}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3359052005662891, "res": {"No": 0.6640826927405231, "Yes": 0.3359052005662891}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.641682985053194, "res": {"Yes": 0.641682985053194, "No": 0.3582891274336679}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.48055851912791914, "res": {"No": 0.5194243499147168, "Yes": 0.48055851912791914}, "ground_truth": 1}, {"key": "14018647", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.43256330213601624, "res": {"No": 0.567415011677609, "Yes": 0.43256330213601624}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4315216050807714, "res": {"No": 0.5684548093387158, "Yes": 0.4315216050807714}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4117841519301262, "res": {"No": 0.58818632619251, "Yes": 0.4117841519301262}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.23140626274584838, "res": {"No": 0.7685632624523998, "Yes": 0.23140626274584838}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2781890111759289, "res": {"No": 0.7217793472685474, "Yes": 0.2781890111759289}, "ground_truth": 1}, {"key": "37424289", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.28041113878084123, "res": {"No": 0.7195639467165822, "Yes": 0.28041113878084123}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.27639908431281196, "res": {"No": 0.723569483147099, "Yes": 0.27639908431281196}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3489620157833185, "res": {"No": 0.6510124451385477, "Yes": 0.3489620157833185}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.32917508385396116, "res": {"No": 0.6708066201800597, "Yes": 0.32917508385396116}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5451270675644909, "res": {"Yes": 0.5451270675644909, "No": 0.4548413054952923}, "ground_truth": 1}, {"key": "37498031", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4551975219753362, "res": {"No": 0.5447689277533199, "Yes": 0.4551975219753362}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4006709107728855, "res": {"No": 0.5992979153493616, "Yes": 0.4006709107728855}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.00492665214351615, "res": {"No": 0.9950661910179626, "Yes": 0.00492665214351615}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.33763417233885806, "res": {"No": 0.6623391785159856, "Yes": 0.33763417233885806}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3003530388623532, "res": {"No": 0.6996242806253853, "Yes": 0.3003530388623532}, "ground_truth": 1}, {"key": "30104095", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3645587721736521, "res": {"No": 0.6354083601060242, "Yes": 0.3645587721736521}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4060072631534106, "res": {"No": 0.5939751473339044, "Yes": 0.4060072631534106}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.43250731476576854, "res": {"No": 0.567469259145713, "Yes": 0.43250731476576854}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2730380544286532, "res": {"No": 0.7269469465290513, "Yes": 0.2730380544286532}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.41860634977910116, "res": {"No": 0.5813769426316907, "Yes": 0.41860634977910116}, "ground_truth": 1}, {"key": "37911407", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3134777168699862, "res": {"No": 0.6865028172467081, "Yes": 0.3134777168699862}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.30863911415037226, "res": {"No": 0.6913467998502376, "Yes": 0.30863911415037226}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5030028011556458, "res": {"Yes": 0.5030028011556458, "No": 0.4969757732745939}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.21471455159181707, "res": {"No": 0.7852643235330293, "Yes": 0.21471455159181707}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.41988975090859043, "res": {"No": 0.5800842368726903, "Yes": 0.41988975090859043}, "ground_truth": 1}, {"key": "39177472", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5349713954308, "res": {"Yes": 0.5349713954308, "No": 0.46500224753400465}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.42717914944765206, "res": {"No": 0.5727955913676556, "Yes": 0.42717914944765206}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.22946269768785002, "res": {"No": 0.7705240586386295, "Yes": 0.22946269768785002}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.32701972133842905, "res": {"No": 0.6729632211090809, "Yes": 0.32701972133842905}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3351272394555003, "res": {"No": 0.6648549516081028, "Yes": 0.3351272394555003}, "ground_truth": 1}, {"key": "32325454", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.35809172869375805, "res": {"No": 0.6418923682808494, "Yes": 0.35809172869375805}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3160329179793185, "res": {"No": 0.6839467330841617, "Yes": 0.3160329179793185}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4006736593847612, "res": {"No": 0.5992917366197058, "Yes": 0.4006736593847612}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3687256011094696, "res": {"No": 0.6312475371151128, "Yes": 0.3687256011094696}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3433821541438668, "res": {"No": 0.6565878030556418, "Yes": 0.3433821541438668}, "ground_truth": 1}, {"key": "38395319", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4685287889462573, "res": {"No": 0.5314476132647626, "Yes": 0.4685287889462573}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.53637623134165, "res": {"Yes": 0.53637623134165, "No": 0.463602117505504}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2889108257220506, "res": {"No": 0.7110693181850267, "Yes": 0.2889108257220506}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.36848657481404584, "res": {"No": 0.6314896480263391, "Yes": 0.36848657481404584}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2566284397329474, "res": {"No": 0.7433526849276454, "Yes": 0.2566284397329474}, "ground_truth": 1}, {"key": "38235895", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.46356011705630706, "res": {"No": 0.5364115260588558, "Yes": 0.46356011705630706}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.36271172725743867, "res": {"No": 0.6372664640290168, "Yes": 0.36271172725743867}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.18485767025099892, "res": {"No": 0.8151248233369759, "Yes": 0.18485767025099892}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2507070883463517, "res": {"No": 0.7492786764331943, "Yes": 0.2507070883463517}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3976453704182363, "res": {"No": 0.602330091016673, "Yes": 0.3976453704182363}, "ground_truth": 1}, {"key": "26543267", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3667297420079912, "res": {"No": 0.6332498670810621, "Yes": 0.3667297420079912}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3642834156082089, "res": {"No": 0.6356979308251519, "Yes": 0.3642834156082089}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.008323190700576584, "res": {"No": 0.9916656245946431, "Yes": 0.008323190700576584}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2545938114770713, "res": {"No": 0.7453769838471731, "Yes": 0.2545938114770713}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2860984490349533, "res": {"No": 0.7138869946264534, "Yes": 0.2860984490349533}, "ground_truth": 1}, {"key": "39054728", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3339519695796219, "res": {"No": 0.6660329601177698, "Yes": 0.3339519695796219}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.22044970617962786, "res": {"No": 0.7795299415475474, "Yes": 0.22044970617962786}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.48547845459532535, "res": {"No": 0.514499497698028, "Yes": 0.48547845459532535}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.370107564769579, "res": {"No": 0.6298648540434844, "Yes": 0.370107564769579}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4011019478290082, "res": {"No": 0.5988736727620301, "Yes": 0.4011019478290082}, "ground_truth": 1}, {"key": "39158443", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4104784698764345, "res": {"No": 0.5894924656514257, "Yes": 0.4104784698764345}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.16909459278887604, "res": {"No": 0.8308931021554091, "Yes": 0.16909459278887604}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.615495416608686, "res": {"Yes": 0.615495416608686, "No": 0.3844691720399194}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5332125253764017, "res": {"Yes": 0.5332125253764017, "No": 0.4667513519714804}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5059942011187323, "res": {"Yes": 0.5059942011187323, "No": 0.49398163430774583}, "ground_truth": 1}, {"key": "36254201", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4765555135240608, "res": {"No": 0.5234187845800572, "Yes": 0.4765555135240608}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.37535042225514675, "res": {"No": 0.6246316768507962, "Yes": 0.37535042225514675}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.0824088042566211, "res": {"No": 0.9175730809168795, "Yes": 0.0824088042566211}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5003464102246195, "res": {"Yes": 0.5003464102246195, "No": 0.4996302270794359}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4540226011096658, "res": {"No": 0.5459505910032395, "Yes": 0.4540226011096658}, "ground_truth": 1}, {"key": "23434347", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4740558202406222, "res": {"No": 0.5259212985329136, "Yes": 0.4740558202406222}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.6346539178004396, "res": {"Yes": 0.6346539178004396, "No": 0.365330081933117}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.14992332233777655, "res": {"No": 0.8500525141952423, "Yes": 0.14992332233777655}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.440237044340096, "res": {"No": 0.5597383704767678, "Yes": 0.440237044340096}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.24988414213435978, "res": {"No": 0.7500998409837273, "Yes": 0.24988414213435978}, "ground_truth": 1}, {"key": "34397620", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.48524129320486026, "res": {"No": 0.5147270596861576, "Yes": 0.48524129320486026}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.30516797996513134, "res": {"No": 0.6948078822833427, "Yes": 0.30516797996513134}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.23471651785779946, "res": {"No": 0.7652701119193809, "Yes": 0.23471651785779946}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3888184864781869, "res": {"No": 0.6111667156448722, "Yes": 0.3888184864781869}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.40843698747807783, "res": {"No": 0.5915423502919543, "Yes": 0.40843698747807783}, "ground_truth": 1}, {"key": "34340916", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4309289938062765, "res": {"No": 0.5690574234503446, "Yes": 0.4309289938062765}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4901618187737863, "res": {"No": 0.5098179182759395, "Yes": 0.4901618187737863}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.33280032282783234, "res": {"No": 0.6671833855508347, "Yes": 0.33280032282783234}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.29911807017769326, "res": {"No": 0.700868849525268, "Yes": 0.29911807017769326}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3323650054408964, "res": {"No": 0.6676133903173052, "Yes": 0.3323650054408964}, "ground_truth": 1}, {"key": "30375089", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3122249679135714, "res": {"No": 0.6877578810944488, "Yes": 0.3122249679135714}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.43960290860221635, "res": {"No": 0.5603667493830878, "Yes": 0.43960290860221635}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.24568513546575516, "res": {"No": 0.7542963658837952, "Yes": 0.24568513546575516}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3744096725163265, "res": {"No": 0.6255644292395387, "Yes": 0.3744096725163265}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4663269413390367, "res": {"No": 0.5336470359144991, "Yes": 0.4663269413390367}, "ground_truth": 1}, {"key": "35807797", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4457373062187301, "res": {"No": 0.5542309288548245, "Yes": 0.4457373062187301}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4852232716778924, "res": {"No": 0.5147481845181656, "Yes": 0.4852232716778924}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3853975924144412, "res": {"No": 0.614578691748412, "Yes": 0.3853975924144412}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5089880398782091, "res": {"Yes": 0.5089880398782091, "No": 0.49099165365405883}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.19814761415871265, "res": {"No": 0.8018346856350007, "Yes": 0.19814761415871265}, "ground_truth": 1}, {"key": "34188172", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.44945416460624293, "res": {"No": 0.5505231120272317, "Yes": 0.44945416460624293}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4058717321345912, "res": {"No": 0.5941088246014427, "Yes": 0.4058717321345912}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4140282378978988, "res": {"No": 0.5859562798224175, "Yes": 0.4140282378978988}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.41041300378154905, "res": {"No": 0.5895682203003939, "Yes": 0.41041300378154905}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.31308307760577153, "res": {"No": 0.6869004518170733, "Yes": 0.31308307760577153}, "ground_truth": 1}, {"key": "37075567", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3007033645474104, "res": {"No": 0.6992830032111911, "Yes": 0.3007033645474104}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3175392329299548, "res": {"No": 0.6824417046522364, "Yes": 0.3175392329299548}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.28101017509541515, "res": {"No": 0.7189710761185135, "Yes": 0.28101017509541515}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.31885679356089236, "res": {"No": 0.6811255354181839, "Yes": 0.31885679356089236}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.43312903199583747, "res": {"No": 0.5668560429471388, "Yes": 0.43312903199583747}, "ground_truth": 1}, {"key": "35559735", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4142336882644696, "res": {"No": 0.5857440848886881, "Yes": 0.4142336882644696}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.21551301000666054, "res": {"No": 0.7844785417151772, "Yes": 0.21551301000666054}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.43568568568579685, "res": {"No": 0.5642957700270281, "Yes": 0.43568568568579685}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4269940074188006, "res": {"No": 0.5729788579892673, "Yes": 0.4269940074188006}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.41819016906476925, "res": {"No": 0.5817845667501591, "Yes": 0.41819016906476925}, "ground_truth": 1}, {"key": "33005019", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.413626990674522, "res": {"No": 0.5863416141262807, "Yes": 0.413626990674522}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.43974683177293844, "res": {"No": 0.5602295603805504, "Yes": 0.43974683177293844}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.225448213116318, "res": {"No": 0.7745339231653751, "Yes": 0.225448213116318}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.30515299658516004, "res": {"No": 0.6948267813147665, "Yes": 0.30515299658516004}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2956012976596798, "res": {"No": 0.7043710866358613, "Yes": 0.2956012976596798}, "ground_truth": 1}, {"key": "30808252", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3066167753250264, "res": {"No": 0.6933611815946439, "Yes": 0.3066167753250264}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.25743636902441336, "res": {"No": 0.7425421813647868, "Yes": 0.25743636902441336}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.33938622952553676, "res": {"No": 0.6605950494989603, "Yes": 0.33938622952553676}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.35039299904987325, "res": {"No": 0.6495904250540477, "Yes": 0.35039299904987325}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5605055133685075, "res": {"Yes": 0.5605055133685075, "No": 0.4394736579755638}, "ground_truth": 1}, {"key": "15159017", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4605100727587219, "res": {"No": 0.5394691168201631, "Yes": 0.4605100727587219}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.46954055731838035, "res": {"No": 0.5304429854163488, "Yes": 0.46954055731838035}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2904582298847818, "res": {"No": 0.7095243805289413, "Yes": 0.2904582298847818}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3110469853245748, "res": {"No": 0.6889332032883725, "Yes": 0.3110469853245748}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.25461736249385064, "res": {"No": 0.7453690978004013, "Yes": 0.25461736249385064}, "ground_truth": 1}, {"key": "24493400", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3191002060422789, "res": {"No": 0.680882845957317, "Yes": 0.3191002060422789}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.24266861272853388, "res": {"No": 0.7573092109916811, "Yes": 0.24266861272853388}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.24001609389322212, "res": {"No": 0.7599683053942431, "Yes": 0.24001609389322212}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4214471478630632, "res": {"No": 0.5785263121369171, "Yes": 0.4214471478630632}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.33927710046301524, "res": {"No": 0.6607001849605444, "Yes": 0.33927710046301524}, "ground_truth": 1}, {"key": "37791071", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4280383434127866, "res": {"No": 0.5719401648383424, "Yes": 0.4280383434127866}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.43362652796373236, "res": {"No": 0.5663490187062621, "Yes": 0.43362652796373236}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3092602572842796, "res": {"No": 0.6907248192911075, "Yes": 0.3092602572842796}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4119161251873967, "res": {"No": 0.588061820326666, "Yes": 0.4119161251873967}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2758019895243815, "res": {"No": 0.7241744663535016, "Yes": 0.2758019895243815}, "ground_truth": 1}, {"key": "33528627", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.32701030330607483, "res": {"No": 0.6729669022279683, "Yes": 0.32701030330607483}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2753913781011252, "res": {"No": 0.7245832985401134, "Yes": 0.2753913781011252}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.33764524692133296, "res": {"No": 0.6623318265919073, "Yes": 0.33764524692133296}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.416210919159639, "res": {"No": 0.5837683709009983, "Yes": 0.416210919159639}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4559023806078492, "res": {"No": 0.5440775756473566, "Yes": 0.4559023806078492}, "ground_truth": 1}, {"key": "39925662", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3894858277828237, "res": {"No": 0.6104922500150476, "Yes": 0.3894858277828237}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3469674178950109, "res": {"No": 0.6530155413927655, "Yes": 0.3469674178950109}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.063566307836635, "res": {"No": 0.936422697080412, "Yes": 0.063566307836635}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.27743910814002404, "res": {"No": 0.7225435528869337, "Yes": 0.27743910814002404}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.33592750541211935, "res": {"No": 0.6640495159979557, "Yes": 0.33592750541211935}, "ground_truth": 1}, {"key": "29213416", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.41154548102616206, "res": {"No": 0.5884365938838362, "Yes": 0.41154548102616206}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.42915031363061984, "res": {"No": 0.5708320760822165, "Yes": 0.42915031363061984}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.21333934756136694, "res": {"No": 0.7866468819581957, "Yes": 0.21333934756136694}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.39536461258995964, "res": {"No": 0.6046093693391932, "Yes": 0.39536461258995964}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.45240858425389535, "res": {"No": 0.5475658393505084, "Yes": 0.45240858425389535}, "ground_truth": 1}, {"key": "34492745", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.39176970463754346, "res": {"No": 0.6081995582881428, "Yes": 0.39176970463754346}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2534621948028331, "res": {"No": 0.7465172533095373, "Yes": 0.2534621948028331}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.12207919567197616, "res": {"No": 0.877906274417594, "Yes": 0.12207919567197616}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.41752819668570373, "res": {"No": 0.5824523496874571, "Yes": 0.41752819668570373}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4370934116906873, "res": {"No": 0.562881417014439, "Yes": 0.4370934116906873}, "ground_truth": 1}, {"key": "34191937", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.48558081395244385, "res": {"No": 0.5143960216759934, "Yes": 0.48558081395244385}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3569338887234774, "res": {"No": 0.6430504998236942, "Yes": 0.3569338887234774}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.06040421449912823, "res": {"No": 0.9395846904677831, "Yes": 0.06040421449912823}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4064194031037151, "res": {"No": 0.5935571953431563, "Yes": 0.4064194031037151}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.36810686093155404, "res": {"No": 0.6318696601779895, "Yes": 0.36810686093155404}, "ground_truth": 1}, {"key": "34933372", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4048053680919882, "res": {"No": 0.595174612561178, "Yes": 0.4048053680919882}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.37542483534456006, "res": {"No": 0.6245577560661897, "Yes": 0.37542483534456006}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.14958650869926346, "res": {"No": 0.8504041856403216, "Yes": 0.14958650869926346}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.17156430952697393, "res": {"No": 0.8284215465174559, "Yes": 0.17156430952697393}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2745752007988927, "res": {"No": 0.7254058845653895, "Yes": 0.2745752007988927}, "ground_truth": 1}, {"key": "38714379", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.1842567709273213, "res": {"No": 0.8157282797288964, "Yes": 0.1842567709273213}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.26653124505924475, "res": {"No": 0.7334444845122117, "Yes": 0.26653124505924475}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.41353482416968446, "res": {"No": 0.5864458162891989, "Yes": 0.41353482416968446}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2666240673982057, "res": {"No": 0.7333585078549125, "Yes": 0.2666240673982057}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.23042432004042585, "res": {"No": 0.7695585270892461, "Yes": 0.23042432004042585}, "ground_truth": 1}, {"key": "39220660", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.32790580228496613, "res": {"No": 0.6720713288297386, "Yes": 0.32790580228496613}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.22999895811591645, "res": {"No": 0.7699845725380456, "Yes": 0.22999895811591645}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2503196945150429, "res": {"No": 0.7496577949321301, "Yes": 0.2503196945150429}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.28638242815078874, "res": {"No": 0.7135944608558766, "Yes": 0.28638242815078874}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3966556123334208, "res": {"No": 0.6033159234300564, "Yes": 0.3966556123334208}, "ground_truth": 1}, {"key": "41028780", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.37100540787442793, "res": {"No": 0.6289708479607576, "Yes": 0.37100540787442793}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.39996523026091807, "res": {"No": 0.6000090743282133, "Yes": 0.39996523026091807}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.10589033456419522, "res": {"No": 0.894101478163879, "Yes": 0.10589033456419522}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.35671333605568123, "res": {"No": 0.6432712776598047, "Yes": 0.35671333605568123}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.32868461664512866, "res": {"No": 0.6712920706763581, "Yes": 0.32868461664512866}, "ground_truth": 1}, {"key": "39457108", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.43159570370154837, "res": {"No": 0.5683746117073412, "Yes": 0.43159570370154837}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3300286203331561, "res": {"No": 0.6699546603542353, "Yes": 0.3300286203331561}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.19875314533261762, "res": {"No": 0.8012288238410762, "Yes": 0.19875314533261762}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.33943235524830123, "res": {"No": 0.6605463059886717, "Yes": 0.33943235524830123}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.28718527902456564, "res": {"No": 0.7127846769787446, "Yes": 0.28718527902456564}, "ground_truth": 1}, {"key": "38288018", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2657874898068681, "res": {"No": 0.7341861662747543, "Yes": 0.2657874898068681}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.33349200064036216, "res": {"No": 0.6664705675855704, "Yes": 0.33349200064036216}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.0926366895833353, "res": {"No": 0.9073535838228154, "Yes": 0.0926366895833353}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.40838504984890434, "res": {"No": 0.591596893011032, "Yes": 0.40838504984890434}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.45102588339794614, "res": {"No": 0.5489494874463904, "Yes": 0.45102588339794614}, "ground_truth": 1}, {"key": "40106293", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.36479464506056253, "res": {"No": 0.635183096948963, "Yes": 0.36479464506056253}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2973307272389717, "res": {"No": 0.7026487545085758, "Yes": 0.2973307272389717}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.36416722773158156, "res": {"No": 0.6358127991041812, "Yes": 0.36416722773158156}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3959009758432563, "res": {"No": 0.6040742851203199, "Yes": 0.3959009758432563}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.45206441420513316, "res": {"No": 0.5479085091929281, "Yes": 0.45206441420513316}, "ground_truth": 1}, {"key": "39948797", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.37539879302702045, "res": {"No": 0.6245787353135605, "Yes": 0.37539879302702045}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.39293499025288353, "res": {"No": 0.607037287894289, "Yes": 0.39293499025288353}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.0657032623153883, "res": {"No": 0.9342812143899492, "Yes": 0.0657032623153883}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.27840393953316245, "res": {"No": 0.7215754589699743, "Yes": 0.27840393953316245}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4389315063769671, "res": {"No": 0.5610444180690891, "Yes": 0.4389315063769671}, "ground_truth": 1}, {"key": "31853399", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3785481127182491, "res": {"No": 0.6214298353874332, "Yes": 0.3785481127182491}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.34928771434631606, "res": {"No": 0.6506948220631178, "Yes": 0.34928771434631606}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.18702669842356848, "res": {"No": 0.8129495138077342, "Yes": 0.18702669842356848}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3510546042330959, "res": {"No": 0.6489205427828321, "Yes": 0.3510546042330959}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.38135331408772033, "res": {"No": 0.6186139302439455, "Yes": 0.38135331408772033}, "ground_truth": 1}, {"key": "35273252", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3660661588613429, "res": {"No": 0.6339125803090012, "Yes": 0.3660661588613429}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.18174941159777885, "res": {"No": 0.8182206953926597, "Yes": 0.18174941159777885}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.09501276765913858, "res": {"No": 0.9049634254194815, "Yes": 0.09501276765913858}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2923108173067771, "res": {"No": 0.7076571386692966, "Yes": 0.2923108173067771}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.37937034022936966, "res": {"No": 0.6206025425834606, "Yes": 0.37937034022936966}, "ground_truth": 1}, {"key": "37130459", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4016410097125421, "res": {"No": 0.5983372205548011, "Yes": 0.4016410097125421}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2661106178675295, "res": {"No": 0.7338586087897537, "Yes": 0.2661106178675295}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3783288303364427, "res": {"No": 0.6216435010457618, "Yes": 0.3783288303364427}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.34185496869675386, "res": {"No": 0.6581262807742922, "Yes": 0.34185496869675386}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.41249109554416274, "res": {"No": 0.5874859371595581, "Yes": 0.41249109554416274}, "ground_truth": 1}, {"key": "21734003", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3922162512299256, "res": {"No": 0.6077649908162627, "Yes": 0.3922162512299256}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.38415675252069825, "res": {"No": 0.6158192400427732, "Yes": 0.38415675252069825}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.32822218717696305, "res": {"No": 0.6717568058605575, "Yes": 0.32822218717696305}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.40201134474604827, "res": {"No": 0.597964375230023, "Yes": 0.40201134474604827}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4010070783843752, "res": {"No": 0.5989673020016317, "Yes": 0.4010070783843752}, "ground_truth": 1}, {"key": "33990737", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.49573971407860007, "res": {"No": 0.5042384538902273, "Yes": 0.49573971407860007}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.24488217560806314, "res": {"No": 0.7550948413048706, "Yes": 0.24488217560806314}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.08136446086668288, "res": {"No": 0.9186279507505097, "Yes": 0.08136446086668288}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2774430755476373, "res": {"No": 0.722540070235402, "Yes": 0.2774430755476373}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2732289474246805, "res": {"No": 0.7267479429683967, "Yes": 0.2732289474246805}, "ground_truth": 1}, {"key": "34559912", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2741816873018881, "res": {"No": 0.7257921209054257, "Yes": 0.2741816873018881}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.23547936298556418, "res": {"No": 0.7644944142875596, "Yes": 0.23547936298556418}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4738258641485008, "res": {"No": 0.5261356340888312, "Yes": 0.4738258641485008}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2735449280121196, "res": {"No": 0.7264318109838103, "Yes": 0.2735449280121196}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.40555243674061897, "res": {"No": 0.5944209741179522, "Yes": 0.40555243674061897}, "ground_truth": 1}, {"key": "39820439", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.37021145003464956, "res": {"No": 0.6297641215145237, "Yes": 0.37021145003464956}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.38096430344126225, "res": {"No": 0.6190080509505342, "Yes": 0.38096430344126225}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.443131284308488, "res": {"No": 0.5568489281444267, "Yes": 0.443131284308488}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.35457230162412234, "res": {"No": 0.6454092611461492, "Yes": 0.35457230162412234}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.40053020383458476, "res": {"No": 0.5994463498572422, "Yes": 0.40053020383458476}, "ground_truth": 1}, {"key": "34759328", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4071829239153556, "res": {"No": 0.5927969204121533, "Yes": 0.4071829239153556}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3039903869350733, "res": {"No": 0.6959882297983366, "Yes": 0.3039903869350733}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.32720044965606887, "res": {"No": 0.6727793052078669, "Yes": 0.32720044965606887}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.24385034061627642, "res": {"No": 0.7561342040292119, "Yes": 0.24385034061627642}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3030995438101041, "res": {"No": 0.6968786964673184, "Yes": 0.3030995438101041}, "ground_truth": 1}, {"key": "36939137", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.34005218087022776, "res": {"No": 0.6599210053424676, "Yes": 0.34005218087022776}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.28489738960951927, "res": {"No": 0.7150812331102212, "Yes": 0.28489738960951927}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2969025822135974, "res": {"No": 0.7030837795133296, "Yes": 0.2969025822135974}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.39896649422170943, "res": {"No": 0.6010103640019652, "Yes": 0.39896649422170943}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.46065095973173764, "res": {"No": 0.5393203193941697, "Yes": 0.46065095973173764}, "ground_truth": 1}, {"key": "35851522", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5051891325100191, "res": {"Yes": 0.5051891325100191, "No": 0.4947804748171397}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5059587675855476, "res": {"Yes": 0.5059587675855476, "No": 0.4940148754503041}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2911810035061244, "res": {"No": 0.7088021403604009, "Yes": 0.2911810035061244}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3708303865586947, "res": {"No": 0.629150388150538, "Yes": 0.3708303865586947}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5028551361822157, "res": {"Yes": 0.5028551361822157, "No": 0.4971313212103986}, "ground_truth": 1}, {"key": "22412782", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5387461197016755, "res": {"Yes": 0.5387461197016755, "No": 0.46123869196175493}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.452308124510967, "res": {"No": 0.5476750567723078, "Yes": 0.452308124510967}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.017718499778538094, "res": {"No": 0.982268243050329, "Yes": 0.017718499778538094}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.39112057131399575, "res": {"No": 0.6088478672199156, "Yes": 0.39112057131399575}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2557131936127239, "res": {"No": 0.744263269415733, "Yes": 0.2557131936127239}, "ground_truth": 1}, {"key": "38579227", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.24250515769146355, "res": {"No": 0.7574720272524469, "Yes": 0.24250515769146355}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.31890816552823587, "res": {"No": 0.6810625001668822, "Yes": 0.31890816552823587}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.09759015124790658, "res": {"No": 0.9023999262182834, "Yes": 0.09759015124790658}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.25536308086566106, "res": {"No": 0.744614979673582, "Yes": 0.25536308086566106}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.26827183576035984, "res": {"No": 0.7317128817593201, "Yes": 0.26827183576035984}, "ground_truth": 1}, {"key": "37206995", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.28343838961893425, "res": {"No": 0.7165436445722363, "Yes": 0.28343838961893425}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.23015232653341086, "res": {"No": 0.7698252561146925, "Yes": 0.23015232653341086}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4439904614506219, "res": {"No": 0.555975027671568, "Yes": 0.4439904614506219}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.20177378204371088, "res": {"No": 0.7982058638920805, "Yes": 0.20177378204371088}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.1606901664634329, "res": {"No": 0.8392914877730591, "Yes": 0.1606901664634329}, "ground_truth": 1}, {"key": "38700847", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.19680206530166905, "res": {"No": 0.8031748843645652, "Yes": 0.19680206530166905}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.17600360521106576, "res": {"No": 0.8239755350390717, "Yes": 0.17600360521106576}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.48792861722825553, "res": {"No": 0.5120299292933603, "Yes": 0.48792861722825553}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3626635986046383, "res": {"No": 0.6372973849479868, "Yes": 0.3626635986046383}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4286603493893991, "res": {"No": 0.5712874391087949, "Yes": 0.4286603493893991}, "ground_truth": 1}, {"key": "20246590", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.38107591566115495, "res": {"No": 0.618882417447328, "Yes": 0.38107591566115495}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2649070418519149, "res": {"No": 0.735061051465861, "Yes": 0.2649070418519149}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.21240573267144958, "res": {"No": 0.7875829976457627, "Yes": 0.21240573267144958}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.18989505528940154, "res": {"No": 0.8100917954667719, "Yes": 0.18989505528940154}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3253791373865875, "res": {"No": 0.6746070538936835, "Yes": 0.3253791373865875}, "ground_truth": 1}, {"key": "39141360", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3767597170637149, "res": {"No": 0.6232177205859691, "Yes": 0.3767597170637149}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2897123340102728, "res": {"No": 0.7102730538446699, "Yes": 0.2897123340102728}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2514558629681201, "res": {"No": 0.7485295701682395, "Yes": 0.2514558629681201}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.18933387273134372, "res": {"No": 0.8106517866632675, "Yes": 0.18933387273134372}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.34106534243889075, "res": {"No": 0.6589157485981942, "Yes": 0.34106534243889075}, "ground_truth": 1}, {"key": "37906226", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.35908293516554374, "res": {"No": 0.6408944116788403, "Yes": 0.35908293516554374}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.23098772694038552, "res": {"No": 0.7689956239998741, "Yes": 0.23098772694038552}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.16263816374980108, "res": {"No": 0.8373495861831137, "Yes": 0.16263816374980108}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3918702221303987, "res": {"No": 0.6081108466889131, "Yes": 0.3918702221303987}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4780287030842967, "res": {"No": 0.5219565145653251, "Yes": 0.4780287030842967}, "ground_truth": 1}, {"key": "16201033", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.41649584225754, "res": {"No": 0.5834942976314635, "Yes": 0.41649584225754}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.366602802145866, "res": {"No": 0.633382103458306, "Yes": 0.366602802145866}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.24658946750784905, "res": {"No": 0.7533953166723659, "Yes": 0.24658946750784905}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2972615464282427, "res": {"No": 0.7027198521063237, "Yes": 0.2972615464282427}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3730022041897066, "res": {"No": 0.62698281313052, "Yes": 0.3730022041897066}, "ground_truth": 1}, {"key": "36469022", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.33158396942457913, "res": {"No": 0.6684014185206898, "Yes": 0.33158396942457913}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2989136330479092, "res": {"No": 0.7010727969848554, "Yes": 0.2989136330479092}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.03237332168991188, "res": {"No": 0.9676014903434337, "Yes": 0.03237332168991188}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.16012143225703085, "res": {"No": 0.839861719784659, "Yes": 0.16012143225703085}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2287305284453348, "res": {"No": 0.7712510826526184, "Yes": 0.2287305284453348}, "ground_truth": 1}, {"key": "31295270", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4319185716740655, "res": {"No": 0.5680538831838859, "Yes": 0.4319185716740655}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3827378457540561, "res": {"No": 0.617238538241083, "Yes": 0.3827378457540561}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1626550302018924, "res": {"No": 0.8373373944618948, "Yes": 0.1626550302018924}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3683111801117988, "res": {"No": 0.6316720043570185, "Yes": 0.3683111801117988}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.42038698460879437, "res": {"No": 0.5795926382779918, "Yes": 0.42038698460879437}, "ground_truth": 1}, {"key": "35360689", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.41955925985013004, "res": {"No": 0.5804153564101582, "Yes": 0.41955925985013004}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3659244088896698, "res": {"No": 0.6340552393677911, "Yes": 0.3659244088896698}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.20879042109662632, "res": {"No": 0.791197206019513, "Yes": 0.20879042109662632}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.20778026000353625, "res": {"No": 0.7922033064867612, "Yes": 0.20778026000353625}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.35411551695391413, "res": {"No": 0.6458588936121448, "Yes": 0.35411551695391413}, "ground_truth": 1}, {"key": "29202793", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.25143431412404493, "res": {"No": 0.7485467940318089, "Yes": 0.25143431412404493}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3418338768958741, "res": {"No": 0.6581408781770844, "Yes": 0.3418338768958741}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.19479477704833703, "res": {"No": 0.805193522366382, "Yes": 0.19479477704833703}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.28526414596669436, "res": {"No": 0.7147165274543549, "Yes": 0.28526414596669436}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.40560605431679975, "res": {"No": 0.5943759781533303, "Yes": 0.40560605431679975}, "ground_truth": 1}, {"key": "35999008", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.42359926516260693, "res": {"No": 0.5763823709930365, "Yes": 0.42359926516260693}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.38066844721415105, "res": {"No": 0.6193170749792952, "Yes": 0.38066844721415105}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.30172695291479756, "res": {"No": 0.6982562662331382, "Yes": 0.30172695291479756}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.41676673591215935, "res": {"No": 0.5832071140847698, "Yes": 0.41676673591215935}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3475953024231394, "res": {"No": 0.6523804336661114, "Yes": 0.3475953024231394}, "ground_truth": 1}, {"key": "31797119", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3682926545253616, "res": {"No": 0.6316883775075666, "Yes": 0.3682926545253616}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3120753855517435, "res": {"No": 0.6879045129484744, "Yes": 0.3120753855517435}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.40141306282017564, "res": {"No": 0.5985659068480557, "Yes": 0.40141306282017564}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.33274817710296944, "res": {"No": 0.6672315512579579, "Yes": 0.33274817710296944}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2519223938029187, "res": {"No": 0.7480554520490623, "Yes": 0.2519223938029187}, "ground_truth": 1}, {"key": "26711893", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3354106758154043, "res": {"No": 0.6645624000304237, "Yes": 0.3354106758154043}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.33319205950194114, "res": {"No": 0.6667722004242587, "Yes": 0.33319205950194114}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4434344897914088, "res": {"No": 0.5565459177445988, "Yes": 0.4434344897914088}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5251393620986939, "res": {"Yes": 0.5251393620986939, "No": 0.47484031994578213}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.39527600944902175, "res": {"No": 0.6047056731423976, "Yes": 0.39527600944902175}, "ground_truth": 1}, {"key": "35348288", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4843375490277427, "res": {"No": 0.5156375772233175, "Yes": 0.4843375490277427}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5068573377984823, "res": {"Yes": 0.5068573377984823, "No": 0.49311895303357384}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2080949319074438, "res": {"No": 0.7918926573517548, "Yes": 0.2080949319074438}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2421782630079391, "res": {"No": 0.7578027435422954, "Yes": 0.2421782630079391}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.33487632313304133, "res": {"No": 0.6651026562096947, "Yes": 0.33487632313304133}, "ground_truth": 1}, {"key": "38124131", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.18923289004976704, "res": {"No": 0.8107543487082237, "Yes": 0.18923289004976704}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.1652666085009342, "res": {"No": 0.83471445723391, "Yes": 0.1652666085009342}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.11353832884423799, "res": {"No": 0.886451601495478, "Yes": 0.11353832884423799}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.25416454671421396, "res": {"No": 0.7458139027644473, "Yes": 0.25416454671421396}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3426067469197931, "res": {"No": 0.6573744393071085, "Yes": 0.3426067469197931}, "ground_truth": 1}, {"key": "20285901", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3225855843897813, "res": {"No": 0.6773989423074265, "Yes": 0.3225855843897813}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.17031784689577162, "res": {"No": 0.8296707115360358, "Yes": 0.17031784689577162}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.35437340691081104, "res": {"No": 0.6456072514369666, "Yes": 0.35437340691081104}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3121417710460974, "res": {"No": 0.6878434985242057, "Yes": 0.3121417710460974}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.510750891404788, "res": {"Yes": 0.510750891404788, "No": 0.4892357986025018}, "ground_truth": 1}, {"key": "35633632", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2544988147941117, "res": {"No": 0.7454811948336931, "Yes": 0.2544988147941117}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.21695472234515562, "res": {"No": 0.78302791678987, "Yes": 0.21695472234515562}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3916177666393944, "res": {"No": 0.6083685152611111, "Yes": 0.3916177666393944}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3571894377449459, "res": {"No": 0.6427895514472717, "Yes": 0.3571894377449459}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.36885522725081527, "res": {"No": 0.6311310826877664, "Yes": 0.36885522725081527}, "ground_truth": 1}, {"key": "10741274", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.47994994584848816, "res": {"No": 0.520032640075821, "Yes": 0.47994994584848816}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.39305406759423633, "res": {"No": 0.6069317879120216, "Yes": 0.39305406759423633}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.37083405779769424, "res": {"No": 0.6291481987109969, "Yes": 0.37083405779769424}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4951720307112325, "res": {"No": 0.5047987923962359, "Yes": 0.4951720307112325}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.39512915217168254, "res": {"No": 0.604847735189553, "Yes": 0.39512915217168254}, "ground_truth": 1}, {"key": "30605795", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5339157841731509, "res": {"Yes": 0.5339157841731509, "No": 0.46606105226210903}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5163800105478813, "res": {"Yes": 0.5163800105478813, "No": 0.48359754343502853}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2218147954552631, "res": {"No": 0.7781669415457289, "Yes": 0.2218147954552631}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4659009877833051, "res": {"No": 0.5340786479828201, "Yes": 0.4659009877833051}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2988786920865449, "res": {"No": 0.7011058323133554, "Yes": 0.2988786920865449}, "ground_truth": 1}, {"key": "30539722", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4414849534675834, "res": {"No": 0.5584971295814807, "Yes": 0.4414849534675834}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.36075623429300085, "res": {"No": 0.6392285438129226, "Yes": 0.36075623429300085}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2565703969447667, "res": {"No": 0.7434044166409736, "Yes": 0.2565703969447667}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.40451419111354076, "res": {"No": 0.5954627408418903, "Yes": 0.40451419111354076}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.35445155486252994, "res": {"No": 0.6455242510484437, "Yes": 0.35445155486252994}, "ground_truth": 1}, {"key": "18639299", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3895613564070942, "res": {"No": 0.6104196790100718, "Yes": 0.3895613564070942}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2532294460139941, "res": {"No": 0.7467540264016641, "Yes": 0.2532294460139941}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.42401600789225313, "res": {"No": 0.5759444868081488, "Yes": 0.42401600789225313}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4520755215642435, "res": {"No": 0.5478874753890066, "Yes": 0.4520755215642435}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.41534007354314056, "res": {"No": 0.5846306901454505, "Yes": 0.41534007354314056}, "ground_truth": 1}, {"key": "39773552", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4109322170937638, "res": {"No": 0.5890341779112769, "Yes": 0.4109322170937638}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.41901320669426995, "res": {"No": 0.5809438735388338, "Yes": 0.41901320669426995}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1536403649729346, "res": {"No": 0.8463431051083448, "Yes": 0.1536403649729346}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5208291281626481, "res": {"Yes": 0.5208291281626481, "No": 0.4791472392378256}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.43372629522177925, "res": {"No": 0.5662578721669158, "Yes": 0.43372629522177925}, "ground_truth": 1}, {"key": "34086410", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5439598391191642, "res": {"Yes": 0.5439598391191642, "No": 0.45602112216647445}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.45824561102272765, "res": {"No": 0.5417350687987146, "Yes": 0.45824561102272765}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.36967002336609545, "res": {"No": 0.630314088914935, "Yes": 0.36967002336609545}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3442563829269575, "res": {"No": 0.655723180527661, "Yes": 0.3442563829269575}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.430477250877821, "res": {"No": 0.5695037963536991, "Yes": 0.430477250877821}, "ground_truth": 1}, {"key": "35454652", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.40836047805970466, "res": {"No": 0.5916210898187775, "Yes": 0.40836047805970466}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3652906284147966, "res": {"No": 0.6346870412527585, "Yes": 0.3652906284147966}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.0667190873388016, "res": {"No": 0.9332674530463778, "Yes": 0.0667190873388016}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.40938602158471826, "res": {"No": 0.5905940406825348, "Yes": 0.40938602158471826}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4339103339626186, "res": {"No": 0.5660682641571606, "Yes": 0.4339103339626186}, "ground_truth": 1}, {"key": "36158310", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4746041219096322, "res": {"No": 0.5253759381403085, "Yes": 0.4746041219096322}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.40726728059498607, "res": {"No": 0.5927094300436485, "Yes": 0.40726728059498607}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.03309789479438069, "res": {"No": 0.966885933150555, "Yes": 0.03309789479438069}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.23366016629427705, "res": {"No": 0.7663161775344894, "Yes": 0.23366016629427705}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.26174686667009694, "res": {"No": 0.7382307528612744, "Yes": 0.26174686667009694}, "ground_truth": 1}, {"key": "35688387", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.17445589848961218, "res": {"No": 0.8255253974116049, "Yes": 0.17445589848961218}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2597279349157161, "res": {"No": 0.7402475364524224, "Yes": 0.2597279349157161}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.12968718467263482, "res": {"No": 0.870295595585303, "Yes": 0.12968718467263482}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4019529769363967, "res": {"No": 0.598021244346191, "Yes": 0.4019529769363967}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.35414908869619677, "res": {"No": 0.6458264916842614, "Yes": 0.35414908869619677}, "ground_truth": 1}, {"key": "34209292", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3518211954200298, "res": {"No": 0.6481568431623984, "Yes": 0.3518211954200298}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3949070729946376, "res": {"No": 0.6050684117931542, "Yes": 0.3949070729946376}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.09707733986229887, "res": {"No": 0.9029092635023926, "Yes": 0.09707733986229887}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4033871331417863, "res": {"No": 0.5965884122919632, "Yes": 0.4033871331417863}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4323169972066266, "res": {"No": 0.5676654842111689, "Yes": 0.4323169972066266}, "ground_truth": 1}, {"key": "25037859", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.41053684406515734, "res": {"No": 0.5894469821675514, "Yes": 0.41053684406515734}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.44282162163164956, "res": {"No": 0.5571564827504525, "Yes": 0.44282162163164956}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1294133802525711, "res": {"No": 0.8705758584782715, "Yes": 0.1294133802525711}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.47740824241910707, "res": {"No": 0.5225632237871153, "Yes": 0.47740824241910707}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.46695550697293525, "res": {"No": 0.5330125015636943, "Yes": 0.46695550697293525}, "ground_truth": 1}, {"key": "36412121", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4965245254699346, "res": {"No": 0.503438288187826, "Yes": 0.4965245254699346}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.47096028929298217, "res": {"No": 0.5290071135763386, "Yes": 0.47096028929298217}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3930627148788432, "res": {"No": 0.6069185144589657, "Yes": 0.3930627148788432}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.43245836203334237, "res": {"No": 0.5675241077001232, "Yes": 0.43245836203334237}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3518739725575291, "res": {"No": 0.6481106377080623, "Yes": 0.3518739725575291}, "ground_truth": 1}, {"key": "34909172", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.24388606730809292, "res": {"No": 0.7560993243628857, "Yes": 0.24388606730809292}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.45101602859005807, "res": {"No": 0.5489626074959243, "Yes": 0.45101602859005807}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.45401862388910036, "res": {"No": 0.5459457866591778, "Yes": 0.45401862388910036}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.28440207073902635, "res": {"No": 0.7155665302726633, "Yes": 0.28440207073902635}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2476459930425115, "res": {"No": 0.7523218760103859, "Yes": 0.2476459930425115}, "ground_truth": 1}, {"key": "39011806", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3220012987695679, "res": {"No": 0.6779657422318127, "Yes": 0.3220012987695679}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.18211917472792286, "res": {"No": 0.8178518183096084, "Yes": 0.18211917472792286}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4270736919349191, "res": {"No": 0.5729130837918813, "Yes": 0.4270736919349191}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5254440680553146, "res": {"Yes": 0.5254440680553146, "No": 0.47452987590280477}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.43982832441092573, "res": {"No": 0.5601506576038292, "Yes": 0.43982832441092573}, "ground_truth": 1}, {"key": "33096163", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.500815404442394, "res": {"Yes": 0.500815404442394, "No": 0.4991627171062291}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.36238941981879047, "res": {"No": 0.6375934209622526, "Yes": 0.36238941981879047}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4842284884923788, "res": {"No": 0.5157535726284965, "Yes": 0.4842284884923788}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.36335974587514847, "res": {"No": 0.6366163634620998, "Yes": 0.36335974587514847}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4796112495474151, "res": {"No": 0.5203589328721887, "Yes": 0.4796112495474151}, "ground_truth": 1}, {"key": "38762205", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.433514254911497, "res": {"No": 0.5664616144187707, "Yes": 0.433514254911497}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3935968015422653, "res": {"No": 0.6063768872946477, "Yes": 0.3935968015422653}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3609488212825099, "res": {"No": 0.6390321071089385, "Yes": 0.3609488212825099}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.6995039416072878, "res": {"Yes": 0.6995039416072878, "No": 0.30047554780947777}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6027067011530679, "res": {"Yes": 0.6027067011530679, "No": 0.3972706470353654}, "ground_truth": 1}, {"key": "35519177", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6266943545560975, "res": {"Yes": 0.6266943545560975, "No": 0.3732824153940766}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4301152426667635, "res": {"No": 0.5698644804602002, "Yes": 0.4301152426667635}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.29179362216600996, "res": {"No": 0.7081871103846294, "Yes": 0.29179362216600996}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5928357498820916, "res": {"Yes": 0.5928357498820916, "No": 0.40713970374193326}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4308303697563441, "res": {"No": 0.5691475294452523, "Yes": 0.4308303697563441}, "ground_truth": 1}, {"key": "36192531", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5838450072049602, "res": {"Yes": 0.5838450072049602, "No": 0.4161314013362923}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5397019695618581, "res": {"Yes": 0.5397019695618581, "No": 0.4602653076489903}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.42620716419138216, "res": {"No": 0.5737749687705221, "Yes": 0.42620716419138216}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.36459110997090405, "res": {"No": 0.6353786236906087, "Yes": 0.36459110997090405}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.30731718157588817, "res": {"No": 0.6926609701932412, "Yes": 0.30731718157588817}, "ground_truth": 1}, {"key": "33160852", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.31740967160778205, "res": {"No": 0.6825718791722631, "Yes": 0.31740967160778205}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.25781146938450933, "res": {"No": 0.7421603604205332, "Yes": 0.25781146938450933}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.34082501019405675, "res": {"No": 0.6591501007037306, "Yes": 0.34082501019405675}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4191775256033245, "res": {"No": 0.5808026806720629, "Yes": 0.4191775256033245}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3694452399435716, "res": {"No": 0.6305366668965464, "Yes": 0.3694452399435716}, "ground_truth": 1}, {"key": "36312304", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.41740141262550434, "res": {"No": 0.5825776486632966, "Yes": 0.41740141262550434}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4462717980100159, "res": {"No": 0.5537092607947793, "Yes": 0.4462717980100159}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3298746319120397, "res": {"No": 0.6701134852301063, "Yes": 0.3298746319120397}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.37135461400296205, "res": {"No": 0.6286269389905218, "Yes": 0.37135461400296205}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3875869532026234, "res": {"No": 0.6123916378567196, "Yes": 0.3875869532026234}, "ground_truth": 1}, {"key": "33773343", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.39629610787190456, "res": {"No": 0.6036838169395282, "Yes": 0.39629610787190456}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3547797871041582, "res": {"No": 0.6452045375340505, "Yes": 0.3547797871041582}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.22269499035722698, "res": {"No": 0.7772788987258592, "Yes": 0.22269499035722698}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4043882571369023, "res": {"No": 0.5955839953544498, "Yes": 0.4043882571369023}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.34301321666758505, "res": {"No": 0.6569690957737403, "Yes": 0.34301321666758505}, "ground_truth": 1}, {"key": "34913320", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4745083469707226, "res": {"No": 0.5254643295692184, "Yes": 0.4745083469707226}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.24637832941333812, "res": {"No": 0.7536063496493299, "Yes": 0.24637832941333812}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3593463472501378, "res": {"No": 0.6406358693558006, "Yes": 0.3593463472501378}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2903591714779398, "res": {"No": 0.7096315409923717, "Yes": 0.2903591714779398}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.44442876303946405, "res": {"No": 0.5555598138508302, "Yes": 0.44442876303946405}, "ground_truth": 1}, {"key": "33784155", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3395164091048987, "res": {"No": 0.6604742377129815, "Yes": 0.3395164091048987}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.23410262298469675, "res": {"No": 0.7658876967322203, "Yes": 0.23410262298469675}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1968639495987016, "res": {"No": 0.8030857850853222, "Yes": 0.1968639495987016}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.16156175912121248, "res": {"No": 0.8384100318642549, "Yes": 0.16156175912121248}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.277391198543161, "res": {"No": 0.72257318500564, "Yes": 0.277391198543161}, "ground_truth": 1}, {"key": "24085062", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.12446039825285614, "res": {"No": 0.8755134541362062, "Yes": 0.12446039825285614}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.19344098502136012, "res": {"No": 0.8065239303527304, "Yes": 0.19344098502136012}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.24818463843712418, "res": {"No": 0.7518014418425609, "Yes": 0.24818463843712418}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3164952960213941, "res": {"No": 0.6834818484072224, "Yes": 0.3164952960213941}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.380791042190886, "res": {"No": 0.6191904995056235, "Yes": 0.380791042190886}, "ground_truth": 1}, {"key": "33893487", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.434361170876918, "res": {"No": 0.565617626859176, "Yes": 0.434361170876918}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.12071884888264968, "res": {"No": 0.8792652882048712, "Yes": 0.12071884888264968}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.22227542787421536, "res": {"No": 0.7777079584635405, "Yes": 0.22227542787421536}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.13739659241149588, "res": {"No": 0.8625823382460318, "Yes": 0.13739659241149588}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3281035563001215, "res": {"No": 0.6718701743867469, "Yes": 0.3281035563001215}, "ground_truth": 1}, {"key": "40913011", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.32823111494187135, "res": {"No": 0.6717443247350546, "Yes": 0.32823111494187135}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2929504783852035, "res": {"No": 0.7070270666595883, "Yes": 0.2929504783852035}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.25889396568728407, "res": {"No": 0.7410935080108318, "Yes": 0.25889396568728407}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.33930214003697834, "res": {"No": 0.6606783491802697, "Yes": 0.33930214003697834}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.32241101606413086, "res": {"No": 0.6775710234921578, "Yes": 0.32241101606413086}, "ground_truth": 1}, {"key": "29642545", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3040867671631908, "res": {"No": 0.695896943009409, "Yes": 0.3040867671631908}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3563800005997406, "res": {"No": 0.6436023451950845, "Yes": 0.3563800005997406}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3676859139309803, "res": {"No": 0.6322918079221479, "Yes": 0.3676859139309803}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3002536684835295, "res": {"No": 0.6997316672006106, "Yes": 0.3002536684835295}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3698892435175645, "res": {"No": 0.630078269507426, "Yes": 0.3698892435175645}, "ground_truth": 1}, {"key": "35969159", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.35981919852834826, "res": {"No": 0.6401453813919923, "Yes": 0.35981919852834826}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.29854309043551425, "res": {"No": 0.7014308631740326, "Yes": 0.29854309043551425}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.042669572524788965, "res": {"No": 0.957313880021879, "Yes": 0.042669572524788965}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4208699922489831, "res": {"No": 0.5791053594507527, "Yes": 0.4208699922489831}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3914497595042382, "res": {"No": 0.6085263891206453, "Yes": 0.3914497595042382}, "ground_truth": 1}, {"key": "37081669", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.38460624819599365, "res": {"No": 0.6153720713781278, "Yes": 0.38460624819599365}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.38549070415688974, "res": {"No": 0.6144891971702385, "Yes": 0.38549070415688974}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.47389827027222964, "res": {"No": 0.5260825707248075, "Yes": 0.47389827027222964}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.48515669876012496, "res": {"No": 0.5148169903587002, "Yes": 0.48515669876012496}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5580631951815131, "res": {"Yes": 0.5580631951815131, "No": 0.44191841296260354}, "ground_truth": 1}, {"key": "40048022", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5723237793112468, "res": {"Yes": 0.5723237793112468, "No": 0.42765441121340453}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4585561638600648, "res": {"No": 0.5414243103813542, "Yes": 0.4585561638600648}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3231314734604387, "res": {"No": 0.6768531990377887, "Yes": 0.3231314734604387}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4502166804483869, "res": {"No": 0.549761022349106, "Yes": 0.4502166804483869}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4907424355521968, "res": {"No": 0.5092422716171003, "Yes": 0.4907424355521968}, "ground_truth": 1}, {"key": "32884004", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5590133076157129, "res": {"Yes": 0.5590133076157129, "No": 0.4409676774436424}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.544448784892438, "res": {"Yes": 0.544448784892438, "No": 0.45553522166119953}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5955976939438762, "res": {"Yes": 0.5955976939438762, "No": 0.404381354288269}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4126593973621145, "res": {"No": 0.5873155145881558, "Yes": 0.4126593973621145}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.28649052927657226, "res": {"No": 0.7134921958262338, "Yes": 0.28649052927657226}, "ground_truth": 1}, {"key": "39022490", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3933588419020531, "res": {"No": 0.6066197474126206, "Yes": 0.3933588419020531}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2849304966096714, "res": {"No": 0.7150511645765643, "Yes": 0.2849304966096714}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.457684544337287, "res": {"No": 0.5422929908910727, "Yes": 0.457684544337287}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4305412417720769, "res": {"No": 0.5694450721500911, "Yes": 0.4305412417720769}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5220471862865161, "res": {"Yes": 0.5220471862865161, "No": 0.47793121429226054}, "ground_truth": 1}, {"key": "35159385", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4237316012962545, "res": {"No": 0.5762480088702644, "Yes": 0.4237316012962545}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.32059542789402495, "res": {"No": 0.6793856941985084, "Yes": 0.32059542789402495}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.16739704054930024, "res": {"No": 0.8325763913133456, "Yes": 0.16739704054930024}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4068314732212296, "res": {"No": 0.5931478592513841, "Yes": 0.4068314732212296}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.31961568158747566, "res": {"No": 0.6803677487568621, "Yes": 0.31961568158747566}, "ground_truth": 1}, {"key": "34363669", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4992607872546295, "res": {"No": 0.5007122470946506, "Yes": 0.4992607872546295}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.37401197625803256, "res": {"No": 0.6259699952132584, "Yes": 0.37401197625803256}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.12296739992494153, "res": {"No": 0.8770188439593753, "Yes": 0.12296739992494153}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3745262443908517, "res": {"No": 0.6254501553126306, "Yes": 0.3745262443908517}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.43821413297576767, "res": {"No": 0.5617665763167705, "Yes": 0.43821413297576767}, "ground_truth": 1}, {"key": "36119687", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.39241267130714585, "res": {"No": 0.6075639146506879, "Yes": 0.39241267130714585}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.42229216938153924, "res": {"No": 0.5776796515661804, "Yes": 0.42229216938153924}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.29947552029775815, "res": {"No": 0.7005068951990907, "Yes": 0.29947552029775815}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5104821335158243, "res": {"Yes": 0.5104821335158243, "No": 0.4895009187980268}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.48781385039974234, "res": {"No": 0.5121643855222369, "Yes": 0.48781385039974234}, "ground_truth": 1}, {"key": "35217446", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4811446146046647, "res": {"No": 0.5188312620347348, "Yes": 0.4811446146046647}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.44683819042655115, "res": {"No": 0.5531381995010217, "Yes": 0.44683819042655115}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.42164375665843357, "res": {"No": 0.578338234884842, "Yes": 0.42164375665843357}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.32162956980275575, "res": {"No": 0.67835301869869, "Yes": 0.32162956980275575}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3341615233698138, "res": {"No": 0.6658193776194595, "Yes": 0.3341615233698138}, "ground_truth": 1}, {"key": "39049331", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.28434946122246996, "res": {"No": 0.7156361368143221, "Yes": 0.28434946122246996}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.1623487467331807, "res": {"No": 0.8376326246744105, "Yes": 0.1623487467331807}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.41531986358687417, "res": {"No": 0.5846613431360961, "Yes": 0.41531986358687417}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.33815608505087574, "res": {"No": 0.6618237084133995, "Yes": 0.33815608505087574}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3546169805488401, "res": {"No": 0.6453661944230901, "Yes": 0.3546169805488401}, "ground_truth": 1}, {"key": "36472242", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.43072174526625767, "res": {"No": 0.5692612735604261, "Yes": 0.43072174526625767}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.38525168153590134, "res": {"No": 0.6147328105306452, "Yes": 0.38525168153590134}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.34219155376096533, "res": {"No": 0.6577936491210975, "Yes": 0.34219155376096533}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5094065796753888, "res": {"Yes": 0.5094065796753888, "No": 0.49057596960613453}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.49169066003294715, "res": {"No": 0.5082903701084548, "Yes": 0.49169066003294715}, "ground_truth": 1}, {"key": "31854721", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5128746041733759, "res": {"Yes": 0.5128746041733759, "No": 0.4871040128289609}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.49877406565614796, "res": {"No": 0.5012001483352861, "Yes": 0.49877406565614796}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.006754265822535668, "res": {"No": 0.9932387901319466, "Yes": 0.006754265822535668}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3321143970461308, "res": {"No": 0.6678636386774544, "Yes": 0.3321143970461308}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.27776967941307046, "res": {"No": 0.7222105515193061, "Yes": 0.27776967941307046}, "ground_truth": 1}, {"key": "18725849", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.21222999718468147, "res": {"No": 0.7877514008923249, "Yes": 0.21222999718468147}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.1650509607065144, "res": {"No": 0.834934875989355, "Yes": 0.1650509607065144}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3392771683184421, "res": {"No": 0.6607028079454853, "Yes": 0.3392771683184421}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.46436523128275903, "res": {"No": 0.5356098043361728, "Yes": 0.46436523128275903}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6065221683428372, "res": {"Yes": 0.6065221683428372, "No": 0.39344487888857865}, "ground_truth": 1}, {"key": "36883179", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5474597149492457, "res": {"Yes": 0.5474597149492457, "No": 0.45251419326616643}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.527315625420133, "res": {"Yes": 0.527315625420133, "No": 0.47265382475161816}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2997722677781569, "res": {"No": 0.7002133532645186, "Yes": 0.2997722677781569}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.34150184409517537, "res": {"No": 0.6584667188015703, "Yes": 0.34150184409517537}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2987103811285243, "res": {"No": 0.7012645666181326, "Yes": 0.2987103811285243}, "ground_truth": 1}, {"key": "34266359", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.29730313622772664, "res": {"No": 0.7026706564115932, "Yes": 0.29730313622772664}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2887607755270008, "res": {"No": 0.7112194549873652, "Yes": 0.2887607755270008}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.37215375135648643, "res": {"No": 0.6278277396885641, "Yes": 0.37215375135648643}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.34731400315160765, "res": {"No": 0.6526611937923033, "Yes": 0.34731400315160765}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4192354012475731, "res": {"No": 0.5807399573696697, "Yes": 0.4192354012475731}, "ground_truth": 1}, {"key": "31920289", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3580372312800093, "res": {"No": 0.6419403643675371, "Yes": 0.3580372312800093}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2997904345280313, "res": {"No": 0.7001849251794701, "Yes": 0.2997904345280313}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3075441576289455, "res": {"No": 0.6924351302760611, "Yes": 0.3075441576289455}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3327428531747274, "res": {"No": 0.6672399851180676, "Yes": 0.3327428531747274}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.40780314857143357, "res": {"No": 0.5921765276201202, "Yes": 0.40780314857143357}, "ground_truth": 1}, {"key": "36292997", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3873044278176224, "res": {"No": 0.6126751734397974, "Yes": 0.3873044278176224}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4253621522360409, "res": {"No": 0.5746122577490136, "Yes": 0.4253621522360409}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.10204683545063421, "res": {"No": 0.8979432218656241, "Yes": 0.10204683545063421}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.25768908955411635, "res": {"No": 0.7422907249145122, "Yes": 0.25768908955411635}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4155770677237462, "res": {"No": 0.5844055513036744, "Yes": 0.4155770677237462}, "ground_truth": 1}, {"key": "30412533", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.41165924380238555, "res": {"No": 0.5883164238660813, "Yes": 0.41165924380238555}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2988633002302461, "res": {"No": 0.7011231989199064, "Yes": 0.2988633002302461}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2399186191543424, "res": {"No": 0.7600639534240029, "Yes": 0.2399186191543424}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2866328644684825, "res": {"No": 0.7133499111317996, "Yes": 0.2866328644684825}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2968471259910124, "res": {"No": 0.7031321674040439, "Yes": 0.2968471259910124}, "ground_truth": 1}, {"key": "40433191", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.24342928239882605, "res": {"No": 0.7565525224058768, "Yes": 0.24342928239882605}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.34539513447757536, "res": {"No": 0.6545890007930069, "Yes": 0.34539513447757536}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.37925239336081507, "res": {"No": 0.6207244222711132, "Yes": 0.37925239336081507}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.1866609704188437, "res": {"No": 0.8133244105975168, "Yes": 0.1866609704188437}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4649868097156557, "res": {"No": 0.5349924202197748, "Yes": 0.4649868097156557}, "ground_truth": 1}, {"key": "34565591", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3922181456389941, "res": {"No": 0.6077574910425497, "Yes": 0.3922181456389941}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.19071612748977018, "res": {"No": 0.8092702688715805, "Yes": 0.19071612748977018}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.09893314104244151, "res": {"No": 0.9010563695610533, "Yes": 0.09893314104244151}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4313836696737767, "res": {"No": 0.568590777249312, "Yes": 0.4313836696737767}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4300413122110538, "res": {"No": 0.569940431360387, "Yes": 0.4300413122110538}, "ground_truth": 1}, {"key": "36062480", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.43401336084432673, "res": {"No": 0.5659694372660774, "Yes": 0.43401336084432673}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2276023617559928, "res": {"No": 0.7723811601793829, "Yes": 0.2276023617559928}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.21028021091744126, "res": {"No": 0.7897042031725374, "Yes": 0.21028021091744126}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.26655350134739425, "res": {"No": 0.7334301898185107, "Yes": 0.26655350134739425}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.29130655869821304, "res": {"No": 0.7086723280489469, "Yes": 0.29130655869821304}, "ground_truth": 1}, {"key": "37276883", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.25763420762317446, "res": {"No": 0.7423491826108408, "Yes": 0.25763420762317446}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3654429498269748, "res": {"No": 0.63453643519575, "Yes": 0.3654429498269748}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.17191319668391977, "res": {"No": 0.8280773343374629, "Yes": 0.17191319668391977}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5114036391992994, "res": {"Yes": 0.5114036391992994, "No": 0.48856961987396713}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7657384321083068, "res": {"Yes": 0.7657384321083068, "No": 0.23424411052858685}, "ground_truth": 1}, {"key": "38509260", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6020778036986568, "res": {"Yes": 0.6020778036986568, "No": 0.3978997857505059}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.29216236112264865, "res": {"No": 0.707819968077553, "Yes": 0.29216236112264865}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3973462667537379, "res": {"No": 0.6025972595537366, "Yes": 0.3973462667537379}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.17793648824026576, "res": {"No": 0.8220373968577953, "Yes": 0.17793648824026576}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.19111250364937063, "res": {"No": 0.8088667055185348, "Yes": 0.19111250364937063}, "ground_truth": 1}, {"key": "37139607", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.17952923773508345, "res": {"No": 0.8204524357235596, "Yes": 0.17952923773508345}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.08500092841687507, "res": {"No": 0.9149727001988178, "Yes": 0.08500092841687507}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.010425103325786103, "res": {"No": 0.9895542062348452, "Yes": 0.010425103325786103}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.20093166577400562, "res": {"No": 0.7990287431164621, "Yes": 0.20093166577400562}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.15337778776182912, "res": {"No": 0.8465903919024944, "Yes": 0.15337778776182912}, "ground_truth": 1}, {"key": "37092824", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.1060198752593244, "res": {"No": 0.8939581651837716, "Yes": 0.1060198752593244}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.07531578786133333, "res": {"No": 0.9246615872790023, "Yes": 0.07531578786133333}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3172826379437216, "res": {"No": 0.6827009930274022, "Yes": 0.3172826379437216}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3439458660932714, "res": {"No": 0.6560294354053384, "Yes": 0.3439458660932714}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3812618765268038, "res": {"No": 0.6187145869185946, "Yes": 0.3812618765268038}, "ground_truth": 1}, {"key": "32191802", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3133067614077752, "res": {"No": 0.6866704473721538, "Yes": 0.3133067614077752}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.39972361228138464, "res": {"No": 0.6002565390875528, "Yes": 0.39972361228138464}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.30531379353395877, "res": {"No": 0.6946640024701908, "Yes": 0.30531379353395877}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5206814211375367, "res": {"Yes": 0.5206814211375367, "No": 0.4792973987580318}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.43115867550813913, "res": {"No": 0.5688158415810756, "Yes": 0.43115867550813913}, "ground_truth": 1}, {"key": "39396038", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5461275622664278, "res": {"Yes": 0.5461275622664278, "No": 0.4538459298957623}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4755747340242607, "res": {"No": 0.5244019544909977, "Yes": 0.4755747340242607}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1391163980818205, "res": {"No": 0.8608677558683132, "Yes": 0.1391163980818205}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.40913588948170165, "res": {"No": 0.5908436784688763, "Yes": 0.40913588948170165}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3850378453172923, "res": {"No": 0.6149461352320161, "Yes": 0.3850378453172923}, "ground_truth": 1}, {"key": "39076884", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.43080584758960505, "res": {"No": 0.5691780934882208, "Yes": 0.43080584758960505}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.24372600864990843, "res": {"No": 0.7562635140337912, "Yes": 0.24372600864990843}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5257393612405878, "res": {"Yes": 0.5257393612405878, "No": 0.47424989100171866}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4623672177530412, "res": {"No": 0.5376100154593635, "Yes": 0.4623672177530412}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.355267092200917, "res": {"No": 0.6447132852541702, "Yes": 0.355267092200917}, "ground_truth": 1}, {"key": "27763432", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4229134630259055, "res": {"No": 0.5770671162006696, "Yes": 0.4229134630259055}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5224163519087184, "res": {"Yes": 0.5224163519087184, "No": 0.4775590795063508}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3662823481343282, "res": {"No": 0.6336872137439911, "Yes": 0.3662823481343282}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.16792665034632956, "res": {"No": 0.8320527489419678, "Yes": 0.16792665034632956}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.23086748293921017, "res": {"No": 0.7690996915287639, "Yes": 0.23086748293921017}, "ground_truth": 1}, {"key": "37806929", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.22678546420257836, "res": {"No": 0.7731886585133843, "Yes": 0.22678546420257836}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.29113403979798375, "res": {"No": 0.7088365110094965, "Yes": 0.29113403979798375}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.195662586633073, "res": {"No": 0.8043285250633199, "Yes": 0.195662586633073}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.33642625734184795, "res": {"No": 0.6635567484034269, "Yes": 0.33642625734184795}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4265004316397154, "res": {"No": 0.5734813514668945, "Yes": 0.4265004316397154}, "ground_truth": 1}, {"key": "32334186", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.36919474866703667, "res": {"No": 0.6307873802741006, "Yes": 0.36919474866703667}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.433324763905002, "res": {"No": 0.5666548051008449, "Yes": 0.433324763905002}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.18947397480736777, "res": {"No": 0.8105170026904313, "Yes": 0.18947397480736777}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.26830577429415053, "res": {"No": 0.7316812431783439, "Yes": 0.26830577429415053}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2884493706833329, "res": {"No": 0.7115387219169036, "Yes": 0.2884493706833329}, "ground_truth": 1}, {"key": "36187324", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.36263734271057424, "res": {"No": 0.6373497347051615, "Yes": 0.36263734271057424}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.366098886638855, "res": {"No": 0.6338883145998756, "Yes": 0.366098886638855}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.23679130390991615, "res": {"No": 0.7631932066710738, "Yes": 0.23679130390991615}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.28096971254357506, "res": {"No": 0.7190101820188189, "Yes": 0.28096971254357506}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3349803518738335, "res": {"No": 0.665002246590773, "Yes": 0.3349803518738335}, "ground_truth": 1}, {"key": "35306009", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3375098783205359, "res": {"No": 0.6624679298944111, "Yes": 0.3375098783205359}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.33504846030235663, "res": {"No": 0.6649281894145206, "Yes": 0.33504846030235663}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5178863512113856, "res": {"Yes": 0.5178863512113856, "No": 0.4820846008007124}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5739149295529289, "res": {"Yes": 0.5739149295529289, "No": 0.4260607232630615}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.32064166108806047, "res": {"No": 0.6793372013783858, "Yes": 0.32064166108806047}, "ground_truth": 1}, {"key": "39490050", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2863923085149953, "res": {"No": 0.713587845865885, "Yes": 0.2863923085149953}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.19128326246737987, "res": {"No": 0.8086989801331907, "Yes": 0.19128326246737987}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5255333117993105, "res": {"Yes": 0.5255333117993105, "No": 0.47445115792622916}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2649144858443785, "res": {"No": 0.7350733417893883, "Yes": 0.2649144858443785}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4122527693161678, "res": {"No": 0.5877302646043846, "Yes": 0.4122527693161678}, "ground_truth": 1}, {"key": "38072149", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2733890791709909, "res": {"No": 0.7265971075528699, "Yes": 0.2733890791709909}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.39176406319441515, "res": {"No": 0.6082140092813249, "Yes": 0.39176406319441515}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1576489426435876, "res": {"No": 0.8423357426308479, "Yes": 0.1576489426435876}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.34176490177933977, "res": {"No": 0.6582113951683001, "Yes": 0.34176490177933977}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3021828141158576, "res": {"No": 0.6977922895127191, "Yes": 0.3021828141158576}, "ground_truth": 1}, {"key": "35899689", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.31421211363657364, "res": {"No": 0.6857541687996754, "Yes": 0.31421211363657364}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3817891179733112, "res": {"No": 0.6181808294726912, "Yes": 0.3817891179733112}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.08800190649699748, "res": {"No": 0.9119809292833142, "Yes": 0.08800190649699748}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4033724904546197, "res": {"No": 0.5966031661058294, "Yes": 0.4033724904546197}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4320958242248975, "res": {"No": 0.5678871838334978, "Yes": 0.4320958242248975}, "ground_truth": 1}, {"key": "27994518", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3914549853934103, "res": {"No": 0.6085263891206453, "Yes": 0.3914549853934103}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3919444101851258, "res": {"No": 0.6080322656018218, "Yes": 0.3919444101851258}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.20218420321551436, "res": {"No": 0.7978000315217414, "Yes": 0.20218420321551436}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.28777018101228957, "res": {"No": 0.7122041009580179, "Yes": 0.28777018101228957}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.30026297649152583, "res": {"No": 0.6997134044424267, "Yes": 0.30026297649152583}, "ground_truth": 1}, {"key": "10615479", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3715719720280354, "res": {"No": 0.6284060971438407, "Yes": 0.3715719720280354}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.23277110272911164, "res": {"No": 0.7672160234561112, "Yes": 0.23277110272911164}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.385202041202614, "res": {"No": 0.6147790524717909, "Yes": 0.385202041202614}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.17322913171993778, "res": {"No": 0.826756330548391, "Yes": 0.17322913171993778}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4202554240729098, "res": {"No": 0.5797218844570234, "Yes": 0.4202554240729098}, "ground_truth": 1}, {"key": "40186667", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4040406448569015, "res": {"No": 0.59594207871708, "Yes": 0.4040406448569015}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3686446157388788, "res": {"No": 0.6313424523173896, "Yes": 0.3686446157388788}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.21633760616354797, "res": {"No": 0.7836443396240691, "Yes": 0.21633760616354797}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.29392445513066756, "res": {"No": 0.7060624989373685, "Yes": 0.29392445513066756}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.43820545642183295, "res": {"No": 0.5617747556976674, "Yes": 0.43820545642183295}, "ground_truth": 1}, {"key": "38622886", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4528174340834068, "res": {"No": 0.5471592967690224, "Yes": 0.4528174340834068}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.45456259139769906, "res": {"No": 0.54541789408797, "Yes": 0.45456259139769906}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.15335117902402717, "res": {"No": 0.8466373874394979, "Yes": 0.15335117902402717}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.10225332154900427, "res": {"No": 0.897735555207339, "Yes": 0.10225332154900427}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4385690811049335, "res": {"No": 0.561416850704401, "Yes": 0.4385690811049335}, "ground_truth": 1}, {"key": "40686943", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.43459713649948606, "res": {"No": 0.5653919056347254, "Yes": 0.43459713649948606}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5178895931900915, "res": {"Yes": 0.5178895931900915, "No": 0.48209497537295165}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.17370936367855871, "res": {"No": 0.8262761824729996, "Yes": 0.17370936367855871}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.259119923331343, "res": {"No": 0.7408551513905091, "Yes": 0.259119923331343}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.23785474660666164, "res": {"No": 0.7621273585931615, "Yes": 0.23785474660666164}, "ground_truth": 1}, {"key": "30604567", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.1816998371205611, "res": {"No": 0.8182862865924626, "Yes": 0.1816998371205611}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.32786442318379394, "res": {"No": 0.6721095035653685, "Yes": 0.32786442318379394}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5430363871322934, "res": {"Yes": 0.5430363871322934, "No": 0.45693460253587476}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.47682248795883764, "res": {"No": 0.5231549276541876, "Yes": 0.47682248795883764}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5048361791792163, "res": {"Yes": 0.5048361791792163, "No": 0.49513882558823324}, "ground_truth": 1}, {"key": "35440903", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6143258139273258, "res": {"Yes": 0.6143258139273258, "No": 0.3856507314253046}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.40214560276797306, "res": {"No": 0.5978367234632224, "Yes": 0.40214560276797306}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2946706167035222, "res": {"No": 0.7052915327606986, "Yes": 0.2946706167035222}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4167073758860174, "res": {"No": 0.5832742159232045, "Yes": 0.4167073758860174}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.31815413893677863, "res": {"No": 0.6818226833712959, "Yes": 0.31815413893677863}, "ground_truth": 1}, {"key": "37219533", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.21231679577529278, "res": {"No": 0.7876708574294383, "Yes": 0.21231679577529278}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2547990968079968, "res": {"No": 0.7451829874680018, "Yes": 0.2547990968079968}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2685198603458955, "res": {"No": 0.7314610110079071, "Yes": 0.2685198603458955}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3963289621810394, "res": {"No": 0.6036417416438287, "Yes": 0.3963289621810394}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4548460813540732, "res": {"No": 0.5451192668519682, "Yes": 0.4548460813540732}, "ground_truth": 1}, {"key": "40178965", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.430728486114318, "res": {"No": 0.5692463135707321, "Yes": 0.430728486114318}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.39611967650042945, "res": {"No": 0.6038433312748795, "Yes": 0.39611967650042945}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2768237657502432, "res": {"No": 0.7231596754229576, "Yes": 0.2768237657502432}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.33807365264625877, "res": {"No": 0.6619072093595819, "Yes": 0.33807365264625877}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2635414810633919, "res": {"No": 0.7364382198743249, "Yes": 0.2635414810633919}, "ground_truth": 1}, {"key": "13750468", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3140164518561774, "res": {"No": 0.6859626217401452, "Yes": 0.3140164518561774}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.33181476583526454, "res": {"No": 0.6681730380455189, "Yes": 0.33181476583526454}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2991192965642951, "res": {"No": 0.7008637051667923, "Yes": 0.2991192965642951}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.49577402045380115, "res": {"No": 0.5042069903923758, "Yes": 0.49577402045380115}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3252719751493218, "res": {"No": 0.6747142169887916, "Yes": 0.3252719751493218}, "ground_truth": 1}, {"key": "17754949", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.44384822969351734, "res": {"No": 0.5561309449279049, "Yes": 0.44384822969351734}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3910955012888774, "res": {"No": 0.608887821127836, "Yes": 0.3910955012888774}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.36960113438345993, "res": {"No": 0.6303781635986139, "Yes": 0.36960113438345993}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.24791847914008044, "res": {"No": 0.7520615951067277, "Yes": 0.24791847914008044}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3357031807712757, "res": {"No": 0.6642746935157955, "Yes": 0.3357031807712757}, "ground_truth": 1}, {"key": "36675623", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2534169811804553, "res": {"No": 0.7465705789414435, "Yes": 0.2534169811804553}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2147824335897598, "res": {"No": 0.7852010965952491, "Yes": 0.2147824335897598}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.44744623219748425, "res": {"No": 0.5525349829710736, "Yes": 0.44744623219748425}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2640858075625937, "res": {"No": 0.7358973059115201, "Yes": 0.2640858075625937}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.42914271773729246, "res": {"No": 0.5708339598311758, "Yes": 0.42914271773729246}, "ground_truth": 1}, {"key": "40035440", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4366491529532654, "res": {"No": 0.5633334233163357, "Yes": 0.4366491529532654}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2817200870810536, "res": {"No": 0.718262196794964, "Yes": 0.2817200870810536}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2955011944535974, "res": {"No": 0.7044724881519816, "Yes": 0.2955011944535974}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.08654331836942405, "res": {"No": 0.9134421162293581, "Yes": 0.08654331836942405}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3982871943807238, "res": {"No": 0.6016854370842192, "Yes": 0.3982871943807238}, "ground_truth": 1}, {"key": "37685909", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.45451921001416856, "res": {"No": 0.5454532383126475, "Yes": 0.45451921001416856}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3386646819204854, "res": {"No": 0.6613100017762301, "Yes": 0.3386646819204854}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.08256135211673983, "res": {"No": 0.9174252773044396, "Yes": 0.08256135211673983}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4865669846447602, "res": {"No": 0.5134194328376223, "Yes": 0.4865669846447602}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.517857485030677, "res": {"Yes": 0.517857485030677, "No": 0.48212763359270483}, "ground_truth": 1}, {"key": "36938787", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5313320359783631, "res": {"Yes": 0.5313320359783631, "No": 0.46865388877524994}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.43787062016166667, "res": {"No": 0.5621126524367421, "Yes": 0.43787062016166667}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5176120062829701, "res": {"Yes": 0.5176120062829701, "No": 0.4823597857100477}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4285023914413069, "res": {"No": 0.5714790239045241, "Yes": 0.4285023914413069}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4414458793636019, "res": {"No": 0.5585339692671073, "Yes": 0.4414458793636019}, "ground_truth": 1}, {"key": "39398068", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4011701610148983, "res": {"No": 0.5988064529650442, "Yes": 0.4011701610148983}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4935765577112612, "res": {"No": 0.5064015486506271, "Yes": 0.4935765577112612}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.18472768708618276, "res": {"No": 0.8152557918115091, "Yes": 0.18472768708618276}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5134244438357404, "res": {"Yes": 0.5134244438357404, "No": 0.48655224188847934}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5508735764416741, "res": {"Yes": 0.5508735764416741, "No": 0.44910680341733245}, "ground_truth": 1}, {"key": "39926408", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.647971477371901, "res": {"Yes": 0.647971477371901, "No": 0.35199845239458416}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.49590966294717087, "res": {"No": 0.5040660842311144, "Yes": 0.49590966294717087}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.37680803959604453, "res": {"No": 0.6231526288945267, "Yes": 0.37680803959604453}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.36769073064800223, "res": {"No": 0.6322735286302094, "Yes": 0.36769073064800223}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.35806845348784533, "res": {"No": 0.6418960014019356, "Yes": 0.35806845348784533}, "ground_truth": 1}, {"key": "40465336", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3521218142640406, "res": {"No": 0.6478438719557884, "Yes": 0.3521218142640406}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2325624006668322, "res": {"No": 0.7674160397249332, "Yes": 0.2325624006668322}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.15726287669208605, "res": {"No": 0.8427205757861573, "Yes": 0.15726287669208605}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4071179793469536, "res": {"No": 0.5928538909335946, "Yes": 0.4071179793469536}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.40194347889977144, "res": {"No": 0.5980299157171013, "Yes": 0.40194347889977144}, "ground_truth": 1}, {"key": "34173549", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4328114916200606, "res": {"No": 0.5671658577226938, "Yes": 0.4328114916200606}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.37556487990503645, "res": {"No": 0.624403553058942, "Yes": 0.37556487990503645}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3863051414206292, "res": {"No": 0.6136780910476762, "Yes": 0.3863051414206292}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.29937638036650893, "res": {"No": 0.7006035998453051, "Yes": 0.29937638036650893}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.31295530317402853, "res": {"No": 0.6870262485454068, "Yes": 0.31295530317402853}, "ground_truth": 1}, {"key": "33541535", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3476046876230045, "res": {"No": 0.6523744513649636, "Yes": 0.3476046876230045}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3209362415613609, "res": {"No": 0.6790476073254056, "Yes": 0.3209362415613609}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.24276052873537163, "res": {"No": 0.7572189754024425, "Yes": 0.24276052873537163}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3847724955523554, "res": {"No": 0.6152025597237656, "Yes": 0.3847724955523554}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.29284667513068696, "res": {"No": 0.70713249225451, "Yes": 0.29284667513068696}, "ground_truth": 1}, {"key": "35685195", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3683586953186982, "res": {"No": 0.6316100845854786, "Yes": 0.3683586953186982}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.26830496937803505, "res": {"No": 0.7316748483122241, "Yes": 0.26830496937803505}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.30101303786657224, "res": {"No": 0.6989704446319334, "Yes": 0.30101303786657224}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.49717173962979944, "res": {"No": 0.5027972357469659, "Yes": 0.49717173962979944}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.464261707952002, "res": {"No": 0.535717183439798, "Yes": 0.464261707952002}, "ground_truth": 1}, {"key": "28440730", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3928604182688387, "res": {"No": 0.6071177620908842, "Yes": 0.3928604182688387}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4091892974749526, "res": {"No": 0.5907878227478555, "Yes": 0.4091892974749526}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3299808026505446, "res": {"No": 0.6699828928384769, "Yes": 0.3299808026505446}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.36514235052210914, "res": {"No": 0.6348301094874556, "Yes": 0.36514235052210914}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3372704999913715, "res": {"No": 0.6626929819699093, "Yes": 0.3372704999913715}, "ground_truth": 1}, {"key": "38338714", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.35259873382214973, "res": {"No": 0.6473703222392783, "Yes": 0.35259873382214973}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.22581865800761217, "res": {"No": 0.7741672449401603, "Yes": 0.22581865800761217}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2628692023451029, "res": {"No": 0.7370929686795562, "Yes": 0.2628692023451029}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3228029189600691, "res": {"No": 0.6771639256521936, "Yes": 0.3228029189600691}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.39283531529016, "res": {"No": 0.6071287813782659, "Yes": 0.39283531529016}, "ground_truth": 1}, {"key": "32191881", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4311935274724036, "res": {"No": 0.5687750305085568, "Yes": 0.4311935274724036}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.39299743258374653, "res": {"No": 0.606964708785004, "Yes": 0.39299743258374653}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2703044669335479, "res": {"No": 0.7296745253119336, "Yes": 0.2703044669335479}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.13815764372114858, "res": {"No": 0.8618259697004651, "Yes": 0.13815764372114858}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.38846270935407656, "res": {"No": 0.6115142810490356, "Yes": 0.38846270935407656}, "ground_truth": 1}, {"key": "37707251", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.31776997722493694, "res": {"No": 0.6822085336915309, "Yes": 0.31776997722493694}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.36283557818127415, "res": {"No": 0.637148593380693, "Yes": 0.36283557818127415}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3298097519530957, "res": {"No": 0.6701696430931502, "Yes": 0.3298097519530957}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2665253014787511, "res": {"No": 0.7334561096994205, "Yes": 0.2665253014787511}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.34837869041863767, "res": {"No": 0.6515949190086775, "Yes": 0.34837869041863767}, "ground_truth": 1}, {"key": "40172567", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4387964854810446, "res": {"No": 0.5611725191319142, "Yes": 0.4387964854810446}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4804138927810188, "res": {"No": 0.5195572876145131, "Yes": 0.4804138927810188}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3181153264994352, "res": {"No": 0.6818670237421072, "Yes": 0.3181153264994352}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.40983993415969705, "res": {"No": 0.5901323766768739, "Yes": 0.40983993415969705}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.289212924260107, "res": {"No": 0.7107682725164461, "Yes": 0.289212924260107}, "ground_truth": 1}, {"key": "33113255", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.43973441349775605, "res": {"No": 0.5602383280417771, "Yes": 0.43973441349775605}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.33324847369294136, "res": {"No": 0.6667337154435374, "Yes": 0.33324847369294136}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1761793268704458, "res": {"No": 0.8238076341736259, "Yes": 0.1761793268704458}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4072496870284881, "res": {"No": 0.5927290490504757, "Yes": 0.4072496870284881}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4509789746353464, "res": {"No": 0.5490002401725135, "Yes": 0.4509789746353464}, "ground_truth": 1}, {"key": "33022143", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5259891467565407, "res": {"Yes": 0.5259891467565407, "No": 0.4739921825185326}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5836749754334909, "res": {"Yes": 0.5836749754334909, "No": 0.41629434867477566}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.33879228093793085, "res": {"No": 0.6611895617413136, "Yes": 0.33879228093793085}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2376989314496223, "res": {"No": 0.7622835878484605, "Yes": 0.2376989314496223}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5573448000304119, "res": {"Yes": 0.5573448000304119, "No": 0.44263178485147525}, "ground_truth": 1}, {"key": "32084473", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6026896447947739, "res": {"Yes": 0.6026896447947739, "No": 0.3972828831597295}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5181941499976297, "res": {"Yes": 0.5181941499976297, "No": 0.4817817347307295}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.09192732130054551, "res": {"No": 0.9080562526882259, "Yes": 0.09192732130054551}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.15705299230176054, "res": {"No": 0.8429277166755338, "Yes": 0.15705299230176054}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2956760942493573, "res": {"No": 0.7042953919427077, "Yes": 0.2956760942493573}, "ground_truth": 1}, {"key": "40564245", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.38404625804143605, "res": {"No": 0.6159228665059526, "Yes": 0.38404625804143605}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.26800680687674494, "res": {"No": 0.7319750648599542, "Yes": 0.26800680687674494}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3977447146393434, "res": {"No": 0.6022358698674047, "Yes": 0.3977447146393434}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2720477708450904, "res": {"No": 0.7279334716918396, "Yes": 0.2720477708450904}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3989438894205469, "res": {"No": 0.6010381914260182, "Yes": 0.3989438894205469}, "ground_truth": 1}, {"key": "31717213", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.395706541208828, "res": {"No": 0.6042741459489962, "Yes": 0.395706541208828}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.37456478512428626, "res": {"No": 0.625420259509722, "Yes": 0.37456478512428626}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.45821141342887606, "res": {"No": 0.5417659269070639, "Yes": 0.45821141342887606}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.36726853334524195, "res": {"No": 0.6327158827594473, "Yes": 0.36726853334524195}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.30472583751960214, "res": {"No": 0.6952596542167991, "Yes": 0.30472583751960214}, "ground_truth": 1}, {"key": "34861894", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4783030746638046, "res": {"No": 0.5216726996075275, "Yes": 0.4783030746638046}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3464836664970109, "res": {"No": 0.653500840347862, "Yes": 0.3464836664970109}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4853194718743183, "res": {"No": 0.514664678546897, "Yes": 0.4853194718743183}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4623101466661817, "res": {"No": 0.5376718226593504, "Yes": 0.4623101466661817}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4360565466025945, "res": {"No": 0.5639278845119128, "Yes": 0.4360565466025945}, "ground_truth": 1}, {"key": "40838760", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4572496761350435, "res": {"No": 0.5427323447510949, "Yes": 0.4572496761350435}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3944571117771281, "res": {"No": 0.6055213054911587, "Yes": 0.3944571117771281}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1832320493822193, "res": {"No": 0.8167543438972086, "Yes": 0.1832320493822193}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4137870332300999, "res": {"No": 0.5861949298054, "Yes": 0.4137870332300999}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3341159802580121, "res": {"No": 0.6658657935060803, "Yes": 0.3341159802580121}, "ground_truth": 1}, {"key": "40044849", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4745057229468193, "res": {"No": 0.525472442801101, "Yes": 0.4745057229468193}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4521142163640546, "res": {"No": 0.5478675052544867, "Yes": 0.4521142163640546}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.29839263278966155, "res": {"No": 0.701582935839112, "Yes": 0.29839263278966155}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4336483182442388, "res": {"No": 0.5663245472938725, "Yes": 0.4336483182442388}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4327029128385909, "res": {"No": 0.5672756319530996, "Yes": 0.4327029128385909}, "ground_truth": 1}, {"key": "30296116", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.45360345942470637, "res": {"No": 0.5463680751771254, "Yes": 0.45360345942470637}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3729563836835041, "res": {"No": 0.6270205774426156, "Yes": 0.3729563836835041}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.22157425691794794, "res": {"No": 0.778409191487357, "Yes": 0.22157425691794794}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.36651878641218505, "res": {"No": 0.6334555927129785, "Yes": 0.36651878641218505}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.34402040723869526, "res": {"No": 0.6559601754738444, "Yes": 0.34402040723869526}, "ground_truth": 1}, {"key": "34931360", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4127149409281259, "res": {"No": 0.5872578841443932, "Yes": 0.4127149409281259}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.33894948313728884, "res": {"No": 0.6610285155802922, "Yes": 0.33894948313728884}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.21000223614748587, "res": {"No": 0.7899790443595861, "Yes": 0.21000223614748587}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.440587066206721, "res": {"No": 0.5593943770593273, "Yes": 0.440587066206721}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.37933373275787646, "res": {"No": 0.6206468427740053, "Yes": 0.37933373275787646}, "ground_truth": 1}, {"key": "18862422", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.30895325248056726, "res": {"No": 0.6910309472761819, "Yes": 0.30895325248056726}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.47899115849421564, "res": {"No": 0.5209912666693869, "Yes": 0.47899115849421564}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.436155869791082, "res": {"No": 0.5638111071397447, "Yes": 0.436155869791082}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.34767073878831484, "res": {"No": 0.6523055707832583, "Yes": 0.34767073878831484}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.29058975009280363, "res": {"No": 0.7093881791127596, "Yes": 0.29058975009280363}, "ground_truth": 1}, {"key": "36361140", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.49382646941208974, "res": {"No": 0.5061508457456596, "Yes": 0.49382646941208974}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.31582033096173373, "res": {"No": 0.6841570790436189, "Yes": 0.31582033096173373}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.16909660502650298, "res": {"No": 0.8308728701546972, "Yes": 0.16909660502650298}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.30313946464875346, "res": {"No": 0.6968308713235184, "Yes": 0.30313946464875346}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2717876272022764, "res": {"No": 0.7281937398668619, "Yes": 0.2717876272022764}, "ground_truth": 1}, {"key": "39703329", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.344320420569498, "res": {"No": 0.6556562542782638, "Yes": 0.344320420569498}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2489583414320405, "res": {"No": 0.7510259303883693, "Yes": 0.2489583414320405}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4151893905841173, "res": {"No": 0.5847954798331024, "Yes": 0.4151893905841173}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.10989768306564787, "res": {"No": 0.8900843993693717, "Yes": 0.10989768306564787}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.19068891424008894, "res": {"No": 0.8093014263767101, "Yes": 0.19068891424008894}, "ground_truth": 1}, {"key": "34033324", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.33048202779634733, "res": {"No": 0.6695021784567283, "Yes": 0.33048202779634733}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2071172129165198, "res": {"No": 0.7928733104074359, "Yes": 0.2071172129165198}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5158362029693451, "res": {"Yes": 0.5158362029693451, "No": 0.48414627347528527}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3828144162910505, "res": {"No": 0.617163066927037, "Yes": 0.3828144162910505}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4672185516974236, "res": {"No": 0.5327596631089515, "Yes": 0.4672185516974236}, "ground_truth": 1}, {"key": "35658862", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4529860448700491, "res": {"No": 0.5469915087443121, "Yes": 0.4529860448700491}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3241277016331016, "res": {"No": 0.6758507779391448, "Yes": 0.3241277016331016}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.43286471339307775, "res": {"No": 0.5671111855692494, "Yes": 0.43286471339307775}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4591482666764862, "res": {"No": 0.540815929094818, "Yes": 0.4591482666764862}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5104351406937255, "res": {"Yes": 0.5104351406937255, "No": 0.4895389251247488}, "ground_truth": 1}, {"key": "36092657", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.43161225139804554, "res": {"No": 0.5683660577338038, "Yes": 0.43161225139804554}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.40377153435653446, "res": {"No": 0.59620321814018, "Yes": 0.40377153435653446}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.36385388746294994, "res": {"No": 0.6361302570052485, "Yes": 0.36385388746294994}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.36764786040819225, "res": {"No": 0.6323325098558016, "Yes": 0.36764786040819225}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3143893163749999, "res": {"No": 0.6855926927074792, "Yes": 0.3143893163749999}, "ground_truth": 1}, {"key": "26333438", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.36096344000579894, "res": {"No": 0.639011658408692, "Yes": 0.36096344000579894}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.34818403596981545, "res": {"No": 0.6518024003092006, "Yes": 0.34818403596981545}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.40648843523718103, "res": {"No": 0.5934927385318103, "Yes": 0.40648843523718103}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.28941964073515725, "res": {"No": 0.7105576675468186, "Yes": 0.28941964073515725}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5458787704679455, "res": {"Yes": 0.5458787704679455, "No": 0.4540910456350914}, "ground_truth": 1}, {"key": "34184963", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.44605438274037834, "res": {"No": 0.5539253215318405, "Yes": 0.44605438274037834}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3749600845038772, "res": {"No": 0.6250207998746848, "Yes": 0.3749600845038772}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3630912495994469, "res": {"No": 0.6368986571645235, "Yes": 0.3630912495994469}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.45483718920010374, "res": {"No": 0.5451470795464589, "Yes": 0.45483718920010374}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4214579370481495, "res": {"No": 0.5785256468320407, "Yes": 0.4214579370481495}, "ground_truth": 1}, {"key": "35069975", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.31080884564574585, "res": {"No": 0.6891769840874254, "Yes": 0.31080884564574585}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4220443819409362, "res": {"No": 0.5779356434621478, "Yes": 0.4220443819409362}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.22149505827870006, "res": {"No": 0.7784857673658464, "Yes": 0.22149505827870006}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.14872921134306594, "res": {"No": 0.8512539425891807, "Yes": 0.14872921134306594}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.43219003138278567, "res": {"No": 0.5677781031830152, "Yes": 0.43219003138278567}, "ground_truth": 1}, {"key": "36443950", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.25709446333663344, "res": {"No": 0.7428881380799303, "Yes": 0.25709446333663344}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.05355468487075085, "res": {"No": 0.9464339104204244, "Yes": 0.05355468487075085}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.17687869859832708, "res": {"No": 0.8231073741888454, "Yes": 0.17687869859832708}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.35796480767401606, "res": {"No": 0.642011347637863, "Yes": 0.35796480767401606}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3245378528085151, "res": {"No": 0.6754435721555415, "Yes": 0.3245378528085151}, "ground_truth": 1}, {"key": "29460858", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.16592151560331475, "res": {"No": 0.8340626996606524, "Yes": 0.16592151560331475}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.1586120175426059, "res": {"No": 0.841372501667913, "Yes": 0.1586120175426059}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.29642430151621796, "res": {"No": 0.7035583102042945, "Yes": 0.29642430151621796}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5120769614022824, "res": {"Yes": 0.5120769614022824, "No": 0.487898981344096}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4644353092834118, "res": {"No": 0.5355394084100125, "Yes": 0.4644353092834118}, "ground_truth": 1}, {"key": "36155704", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4230860469235411, "res": {"No": 0.5768920317502902, "Yes": 0.4230860469235411}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4937386748709942, "res": {"No": 0.5062375264378741, "Yes": 0.4937386748709942}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.41165396636470847, "res": {"No": 0.5883212127812624, "Yes": 0.41165396636470847}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3804185290927238, "res": {"No": 0.6195612578955428, "Yes": 0.3804185290927238}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4915015774201324, "res": {"No": 0.5084827079854782, "Yes": 0.4915015774201324}, "ground_truth": 1}, {"key": "37185211", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4459674819705931, "res": {"No": 0.5540139012725149, "Yes": 0.4459674819705931}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4102336924812692, "res": {"No": 0.5897514511962799, "Yes": 0.4102336924812692}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.33570358361533437, "res": {"No": 0.6642691667733367, "Yes": 0.33570358361533437}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3240858270392076, "res": {"No": 0.6758858487459164, "Yes": 0.3240858270392076}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.48870294916973456, "res": {"No": 0.5112681351899859, "Yes": 0.48870294916973456}, "ground_truth": 1}, {"key": "36454885", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4664908560807122, "res": {"No": 0.5334834394782507, "Yes": 0.4664908560807122}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.46145372542573404, "res": {"No": 0.5385235773308282, "Yes": 0.46145372542573404}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.33264088311793877, "res": {"No": 0.6673396515251468, "Yes": 0.33264088311793877}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.35833499137720926, "res": {"No": 0.6416427463748983, "Yes": 0.35833499137720926}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2754070207756491, "res": {"No": 0.7245691548121691, "Yes": 0.2754070207756491}, "ground_truth": 1}, {"key": "33148906", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3796112637518294, "res": {"No": 0.6203585882426997, "Yes": 0.3796112637518294}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.30211319921542984, "res": {"No": 0.6978710607521245, "Yes": 0.30211319921542984}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1613859302690973, "res": {"No": 0.8385940914510702, "Yes": 0.1613859302690973}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.33617214967576653, "res": {"No": 0.6638034515673528, "Yes": 0.33617214967576653}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.32073424369927345, "res": {"No": 0.6792379031241745, "Yes": 0.32073424369927345}, "ground_truth": 1}, {"key": "18086604", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2972297708673635, "res": {"No": 0.7027425292418449, "Yes": 0.2972297708673635}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.11666911822854932, "res": {"No": 0.8833079366902433, "Yes": 0.11666911822854932}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3387182967831378, "res": {"No": 0.6612671568891902, "Yes": 0.3387182967831378}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3503697337271535, "res": {"No": 0.6496117384655384, "Yes": 0.3503697337271535}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3359650642067219, "res": {"No": 0.6640110088830476, "Yes": 0.3359650642067219}, "ground_truth": 1}, {"key": "33693397", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3454777283279303, "res": {"No": 0.6545019986053149, "Yes": 0.3454777283279303}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.25682740386238206, "res": {"No": 0.7431589552992296, "Yes": 0.25682740386238206}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3600594220151069, "res": {"No": 0.6399231230997909, "Yes": 0.3600594220151069}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.22162358483753167, "res": {"No": 0.7783611729054825, "Yes": 0.22162358483753167}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3045893509196745, "res": {"No": 0.6953911193872901, "Yes": 0.3045893509196745}, "ground_truth": 1}, {"key": "39501530", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2754120332290404, "res": {"No": 0.7245740384247303, "Yes": 0.2754120332290404}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.382362946395907, "res": {"No": 0.617610919432251, "Yes": 0.382362946395907}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.10396422805692707, "res": {"No": 0.8960212856352869, "Yes": 0.10396422805692707}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.32689371222134184, "res": {"No": 0.6730863509933456, "Yes": 0.32689371222134184}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.42417093771801523, "res": {"No": 0.5758055857500718, "Yes": 0.42417093771801523}, "ground_truth": 1}, {"key": "30948874", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4460920759282347, "res": {"No": 0.5538770435048027, "Yes": 0.4460920759282347}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.42304130370928134, "res": {"No": 0.5769377522554575, "Yes": 0.42304130370928134}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3183354748228325, "res": {"No": 0.6816468571576717, "Yes": 0.3183354748228325}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.42017233098103834, "res": {"No": 0.5797985287562253, "Yes": 0.42017233098103834}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4275918756050239, "res": {"No": 0.5723830751262536, "Yes": 0.4275918756050239}, "ground_truth": 1}, {"key": "39410675", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.32442697716839236, "res": {"No": 0.6755541242967955, "Yes": 0.32442697716839236}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.24867297478503622, "res": {"No": 0.751310585596329, "Yes": 0.24867297478503622}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.056430668530231445, "res": {"No": 0.9435595217951495, "Yes": 0.056430668530231445}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4860137030028564, "res": {"No": 0.5139695790531267, "Yes": 0.4860137030028564}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4451159806945176, "res": {"No": 0.5548599163875456, "Yes": 0.4451159806945176}, "ground_truth": 1}, {"key": "32903337", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5098194375355997, "res": {"Yes": 0.5098194375355997, "No": 0.49015951501865185}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.48444377589959525, "res": {"No": 0.5155342898935931, "Yes": 0.48444377589959525}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.17054037615507536, "res": {"No": 0.829450123126763, "Yes": 0.17054037615507536}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3272587292457961, "res": {"No": 0.6727220272161802, "Yes": 0.3272587292457961}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2770724928484487, "res": {"No": 0.7229098669120042, "Yes": 0.2770724928484487}, "ground_truth": 1}, {"key": "27685132", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2963043925030536, "res": {"No": 0.7036816125660121, "Yes": 0.2963043925030536}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.29126373978135195, "res": {"No": 0.7087208879405077, "Yes": 0.29126373978135195}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.24301036068177162, "res": {"No": 0.7569725710142692, "Yes": 0.24301036068177162}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4100325221961529, "res": {"No": 0.5899470216073097, "Yes": 0.4100325221961529}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.41379588836736064, "res": {"No": 0.5861895720082269, "Yes": 0.41379588836736064}, "ground_truth": 1}, {"key": "22791471", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4826594917374086, "res": {"No": 0.51732545280662, "Yes": 0.4826594917374086}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.04923911258826477, "res": {"No": 0.9507525500801298, "Yes": 0.04923911258826477}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.17660960567867617, "res": {"No": 0.823375438053138, "Yes": 0.17660960567867617}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3782176974350451, "res": {"No": 0.6217620286775237, "Yes": 0.3782176974350451}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.339359249392764, "res": {"No": 0.6606219230521783, "Yes": 0.339359249392764}, "ground_truth": 1}, {"key": "32292348", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3907694010943004, "res": {"No": 0.6092138521234045, "Yes": 0.3907694010943004}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3969310861050517, "res": {"No": 0.6030524525809247, "Yes": 0.3969310861050517}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2947806377788937, "res": {"No": 0.7052077984679985, "Yes": 0.2947806377788937}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.39683527850530853, "res": {"No": 0.603142694070653, "Yes": 0.39683527850530853}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.44116765752065296, "res": {"No": 0.5588168489677161, "Yes": 0.44116765752065296}, "ground_truth": 1}, {"key": "20482930", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.41380252984466664, "res": {"No": 0.5861777310984658, "Yes": 0.41380252984466664}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3712745622912135, "res": {"No": 0.6287096340168092, "Yes": 0.3712745622912135}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.30974407096872175, "res": {"No": 0.6902422610790193, "Yes": 0.30974407096872175}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4273563373783061, "res": {"No": 0.5726321731895949, "Yes": 0.4273563373783061}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3712205569088137, "res": {"No": 0.6287680627179459, "Yes": 0.3712205569088137}, "ground_truth": 1}, {"key": "11635754", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.48053079650661734, "res": {"No": 0.5194547890734803, "Yes": 0.48053079650661734}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.47312568829821994, "res": {"No": 0.5268563438056406, "Yes": 0.47312568829821994}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.21046108759052698, "res": {"No": 0.789529089889554, "Yes": 0.21046108759052698}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.36030899364272634, "res": {"No": 0.6396757446517648, "Yes": 0.36030899364272634}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4013838008745069, "res": {"No": 0.5986010496840545, "Yes": 0.4013838008745069}, "ground_truth": 1}, {"key": "40029096", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.42681479295423425, "res": {"No": 0.5731700413546492, "Yes": 0.42681479295423425}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3953671706072783, "res": {"No": 0.6046132630360695, "Yes": 0.3953671706072783}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.42129643581588266, "res": {"No": 0.5786835480256991, "Yes": 0.42129643581588266}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.31423693737407565, "res": {"No": 0.6857506988923602, "Yes": 0.31423693737407565}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4915054750430954, "res": {"No": 0.5084751011410662, "Yes": 0.4915054750430954}, "ground_truth": 1}, {"key": "40414719", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4317435073730403, "res": {"No": 0.5682320755464733, "Yes": 0.4317435073730403}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.6376146085436608, "res": {"Yes": 0.6376146085436608, "No": 0.3623648144125478}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3541249365519452, "res": {"No": 0.6458575825199215, "Yes": 0.3541249365519452}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3288380824403085, "res": {"No": 0.6711380805143904, "Yes": 0.3288380824403085}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2551441498531886, "res": {"No": 0.7448313217473419, "Yes": 0.2551441498531886}, "ground_truth": 1}, {"key": "39537616", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3921564153611708, "res": {"No": 0.6078159844381447, "Yes": 0.3921564153611708}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.32427148415019236, "res": {"No": 0.67570442478459, "Yes": 0.32427148415019236}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.0035854467365082424, "res": {"No": 0.996407707775785, "Yes": 0.0035854467365082424}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3526997678318205, "res": {"No": 0.6472729974279056, "Yes": 0.3526997678318205}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.25231423600906977, "res": {"No": 0.747658661539747, "Yes": 0.25231423600906977}, "ground_truth": 1}, {"key": "33245830", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.17371796250488, "res": {"No": 0.8262650278198304, "Yes": 0.17371796250488}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.09326800089494292, "res": {"No": 0.9067167834716592, "Yes": 0.09326800089494292}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.011021118314075849, "res": {"No": 0.9889683271486457, "Yes": 0.011021118314075849}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.23134867289440525, "res": {"No": 0.7686328205751093, "Yes": 0.23134867289440525}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.29925357143361997, "res": {"No": 0.7007210798995562, "Yes": 0.29925357143361997}, "ground_truth": 1}, {"key": "39243601", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.26310644364413777, "res": {"No": 0.7368664948097733, "Yes": 0.26310644364413777}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.1967532841163585, "res": {"No": 0.8032224176607183, "Yes": 0.1967532841163585}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.004679640444978736, "res": {"No": 0.9953157789445417, "Yes": 0.004679640444978736}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.27331234962596307, "res": {"No": 0.7266744506651253, "Yes": 0.27331234962596307}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.43800532988172114, "res": {"No": 0.56197141127488, "Yes": 0.43800532988172114}, "ground_truth": 1}, {"key": "35815905", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.24959803877242656, "res": {"No": 0.7503894354104756, "Yes": 0.24959803877242656}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.18927702430558085, "res": {"No": 0.8107114122948823, "Yes": 0.18927702430558085}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.08762244433386936, "res": {"No": 0.9123644887800835, "Yes": 0.08762244433386936}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4648784990049858, "res": {"No": 0.5351014841901346, "Yes": 0.4648784990049858}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4117258885948562, "res": {"No": 0.588252324400739, "Yes": 0.4117258885948562}, "ground_truth": 1}, {"key": "35260212", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5105434154692008, "res": {"Yes": 0.5105434154692008, "No": 0.48943177678185296}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.45608864109157626, "res": {"No": 0.5438909781625223, "Yes": 0.45608864109157626}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.33370383531115866, "res": {"No": 0.6662810636324047, "Yes": 0.33370383531115866}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.36646846683732853, "res": {"No": 0.6335099328643372, "Yes": 0.36646846683732853}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5895681495522117, "res": {"Yes": 0.5895681495522117, "No": 0.4104153021008055}, "ground_truth": 1}, {"key": "39193924", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.541798477181753, "res": {"Yes": 0.541798477181753, "No": 0.45818300520190763}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5711231033533952, "res": {"Yes": 0.5711231033533952, "No": 0.4288575570664596}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.33560308900719343, "res": {"No": 0.6643731463205472, "Yes": 0.33560308900719343}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3301015647183074, "res": {"No": 0.669877814262738, "Yes": 0.3301015647183074}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.33486852060561206, "res": {"No": 0.6651074316639101, "Yes": 0.33486852060561206}, "ground_truth": 1}, {"key": "40658569", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.32704977583180284, "res": {"No": 0.6729313435963219, "Yes": 0.32704977583180284}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4034504900660185, "res": {"No": 0.5965262092612249, "Yes": 0.4034504900660185}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.194601675363415, "res": {"No": 0.8053808563243945, "Yes": 0.194601675363415}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4577906479115935, "res": {"No": 0.5421921879939856, "Yes": 0.4577906479115935}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5295147806378798, "res": {"Yes": 0.5295147806378798, "No": 0.47046547595633204}, "ground_truth": 1}, {"key": "33497596", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.49584240232220744, "res": {"No": 0.5041390278708477, "Yes": 0.49584240232220744}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4451189184696848, "res": {"No": 0.5548604157616951, "Yes": 0.4451189184696848}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.409683282298779, "res": {"No": 0.5903052517663235, "Yes": 0.409683282298779}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.36355099608694574, "res": {"No": 0.6364292830404648, "Yes": 0.36355099608694574}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.37819127228769855, "res": {"No": 0.6217968669799764, "Yes": 0.37819127228769855}, "ground_truth": 1}, {"key": "40339241", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.36946406365807677, "res": {"No": 0.6305182744102288, "Yes": 0.36946406365807677}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.37398063536762377, "res": {"No": 0.6260023461785528, "Yes": 0.37398063536762377}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2624746691717661, "res": {"No": 0.7375126561751141, "Yes": 0.2624746691717661}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4589060380538042, "res": {"No": 0.5410762864514632, "Yes": 0.4589060380538042}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4278253189516232, "res": {"No": 0.572155117457451, "Yes": 0.4278253189516232}, "ground_truth": 1}, {"key": "31792608", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.46846506399441706, "res": {"No": 0.5315213035848052, "Yes": 0.46846506399441706}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5042037634779635, "res": {"Yes": 0.5042037634779635, "No": 0.4957793401375807}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.24782029807653633, "res": {"No": 0.7521625511101494, "Yes": 0.24782029807653633}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2572626581093513, "res": {"No": 0.742723703133957, "Yes": 0.2572626581093513}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.39992968693029607, "res": {"No": 0.6000530326031704, "Yes": 0.39992968693029607}, "ground_truth": 1}, {"key": "33132662", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4422585115998633, "res": {"No": 0.5577241130079292, "Yes": 0.4422585115998633}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.07067346463074818, "res": {"No": 0.9293187891665742, "Yes": 0.07067346463074818}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.11101075394224813, "res": {"No": 0.8889786317800686, "Yes": 0.11101075394224813}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.29825444932791373, "res": {"No": 0.7017250067347328, "Yes": 0.29825444932791373}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3073478840958929, "res": {"No": 0.6926336383306186, "Yes": 0.3073478840958929}, "ground_truth": 1}, {"key": "37577457", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.29194437249978106, "res": {"No": 0.7080314750438104, "Yes": 0.29194437249978106}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3706466856178215, "res": {"No": 0.6293330444346609, "Yes": 0.3706466856178215}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.14752043063715542, "res": {"No": 0.8524586801767542, "Yes": 0.14752043063715542}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.11945335721454993, "res": {"No": 0.8805306252434799, "Yes": 0.11945335721454993}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.19422200859787603, "res": {"No": 0.8057560337095108, "Yes": 0.19422200859787603}, "ground_truth": 1}, {"key": "38701278", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.23553437739018993, "res": {"No": 0.764442514523531, "Yes": 0.23553437739018993}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.25284242762407766, "res": {"No": 0.7471345945196112, "Yes": 0.25284242762407766}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.28275909451327236, "res": {"No": 0.7172219016509378, "Yes": 0.28275909451327236}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.46907389702242214, "res": {"No": 0.5309001430205035, "Yes": 0.46907389702242214}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.43255709489716765, "res": {"No": 0.5674220249704954, "Yes": 0.43255709489716765}, "ground_truth": 1}, {"key": "34570783", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5310562665409303, "res": {"Yes": 0.5310562665409303, "No": 0.46892375174006706}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4823524538970268, "res": {"No": 0.5176268309031176, "Yes": 0.4823524538970268}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.48654771697367066, "res": {"No": 0.5134343684261605, "Yes": 0.48654771697367066}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4003110296112874, "res": {"No": 0.5996739608762904, "Yes": 0.4003110296112874}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3839361178497527, "res": {"No": 0.6160375497007661, "Yes": 0.3839361178497527}, "ground_truth": 1}, {"key": "39064526", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5051338173285707, "res": {"Yes": 0.5051338173285707, "No": 0.49485370774651577}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.35745135358324065, "res": {"No": 0.6425263830133685, "Yes": 0.35745135358324065}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.30614598978522467, "res": {"No": 0.6938325985747802, "Yes": 0.30614598978522467}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.40713143888983583, "res": {"No": 0.5928463617369903, "Yes": 0.40713143888983583}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.347909635858594, "res": {"No": 0.6520693876121229, "Yes": 0.347909635858594}, "ground_truth": 1}, {"key": "40741545", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.29222085788267566, "res": {"No": 0.7077645479437791, "Yes": 0.29222085788267566}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2502443095765966, "res": {"No": 0.7497423611008408, "Yes": 0.2502443095765966}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1949707951687844, "res": {"No": 0.8050024404123759, "Yes": 0.1949707951687844}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.22045107297204325, "res": {"No": 0.7795183656138669, "Yes": 0.22045107297204325}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.1472616415224901, "res": {"No": 0.8527065856271765, "Yes": 0.1472616415224901}, "ground_truth": 1}, {"key": "36929751", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.33060862665512725, "res": {"No": 0.6693538733724738, "Yes": 0.33060862665512725}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.14564900893911722, "res": {"No": 0.8543148639451323, "Yes": 0.14564900893911722}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.316515520716982, "res": {"No": 0.68347204734779, "Yes": 0.316515520716982}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2895364161702515, "res": {"No": 0.710447177315329, "Yes": 0.2895364161702515}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.59040195168607, "res": {"Yes": 0.59040195168607, "No": 0.4095746077403027}, "ground_truth": 1}, {"key": "23984730", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.36265079680556067, "res": {"No": 0.6373296585047201, "Yes": 0.36265079680556067}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.34126434388626514, "res": {"No": 0.6587107259204138, "Yes": 0.34126434388626514}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.40520805002936616, "res": {"No": 0.5947697994375547, "Yes": 0.40520805002936616}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2661720699035596, "res": {"No": 0.7338111003212865, "Yes": 0.2661720699035596}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.39967302646215247, "res": {"No": 0.6003076531080046, "Yes": 0.39967302646215247}, "ground_truth": 1}, {"key": "36007415", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.34436794006630655, "res": {"No": 0.6556091995174745, "Yes": 0.34436794006630655}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.04926910834061056, "res": {"No": 0.9507150531389992, "Yes": 0.04926910834061056}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3972912063233165, "res": {"No": 0.6026907597716481, "Yes": 0.3972912063233165}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4326813085215091, "res": {"No": 0.5672905912087511, "Yes": 0.4326813085215091}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5087521887618461, "res": {"Yes": 0.5087521887618461, "No": 0.4912239722006319}, "ground_truth": 1}, {"key": "38875041", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3867887231662661, "res": {"No": 0.6131900613254252, "Yes": 0.3867887231662661}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3868445910959584, "res": {"No": 0.6131301065324665, "Yes": 0.3868445910959584}, "ground_truth": 0}]