[{"key": "33773576", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.28089383287222797, "res": {"yes": 0.6798296207006462, "Yes": 0.28089383287222797}, "ground_truth": 0}, {"key": "33773576", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.2682738478066732, "res": {"yes": 0.6652182078796589, "Yes": 0.2682738478066732}, "ground_truth": 0}, {"key": "33773576", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3359492070299237, "res": {"yes": 0.5846728610780072, "Yes": 0.3359492070299237}, "ground_truth": 1}, {"key": "33773576", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.44627971047913845, "res": {"yes": 0.5076257071097696, "Yes": 0.44627971047913845}, "ground_truth": 0}, {"key": "33773576", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6180290414444534, "res": {"Yes": 0.6180290414444534, "yes": 0.3750817037787813}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8937171954849896, "res": {"Yes": 0.8937171954849896, "yes": 0.10220035782454727}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9755559435235354, "res": {"Yes": 0.9755559435235354, "yes": 0.020966796743092927}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7038782205222193, "res": {"Yes": 0.7038782205222193, "yes": 0.28865707128282886}, "ground_truth": 1}, {"key": "37642631", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9807193185914677, "res": {"Yes": 0.9807193185914677, "yes": 0.015983233698671325}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9875612917875638, "res": {"Yes": 0.9875612917875638, "yes": 0.010059991089968018}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9743731361881449, "res": {"Yes": 0.9743731361881449, "yes": 0.022580587545128823}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9656215748338005, "res": {"Yes": 0.9656215748338005, "yes": 0.027033287553202813}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9780252708169629, "res": {"Yes": 0.9780252708169629, "yes": 0.018258090173549662}, "ground_truth": 1}, {"key": "36609836", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9793931439438105, "res": {"Yes": 0.9793931439438105, "yes": 0.017362153984478226}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9760137202838242, "res": {"Yes": 0.9760137202838242, "yes": 0.021726414513669013}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7042852783334953, "res": {"Yes": 0.7042852783334953, "yes": 0.17121739741890035}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8668853314024316, "res": {"Yes": 0.8668853314024316, "yes": 0.12104628328920633}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5856985157715454, "res": {"Yes": 0.5856985157715454, "yes": 0.34994411803614617}, "ground_truth": 1}, {"key": "41035610", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5751202670320283, "res": {"Yes": 0.5751202670320283, "yes": 0.3389654819304615}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.37040758446587607, "res": {"yes": 0.40505711383638715, "Yes": 0.37040758446587607}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9810879549255701, "res": {"Yes": 0.9810879549255701, "yes": 0.012774128045208907}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7240290811998511, "res": {"Yes": 0.7240290811998511, "yes": 0.2711993553355574}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9633716059240828, "res": {"Yes": 0.9633716059240828, "yes": 0.03140214739595813}, "ground_truth": 1}, {"key": "37592684", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8220002005071613, "res": {"Yes": 0.8220002005071613, "yes": 0.17315388001182294}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7346013778145325, "res": {"Yes": 0.7346013778145325, "yes": 0.25809937340043915}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6172063130582471, "res": {"Yes": 0.6172063130582471, "yes": 0.3744920254912159}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9661525045743038, "res": {"Yes": 0.9661525045743038, "yes": 0.027299390583202483}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6939970980279482, "res": {"Yes": 0.6939970980279482, "yes": 0.2979119517681645}, "ground_truth": 1}, {"key": "38951040", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6103152207249564, "res": {"Yes": 0.6103152207249564, "yes": 0.37994451503060805}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5772152683439123, "res": {"Yes": 0.5772152683439123, "yes": 0.4171807908251612}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8167211925112017, "res": {"Yes": 0.8167211925112017, "yes": 0.16405878012123518}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7867168179746116, "res": {"Yes": 0.7867168179746116, "yes": 0.2010499488606981}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9192029345025099, "res": {"Yes": 0.9192029345025099, "yes": 0.07417638061767806}, "ground_truth": 1}, {"key": "40774469", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8703523058939132, "res": {"Yes": 0.8703523058939132, "yes": 0.11328116101683011}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8493143177866957, "res": {"Yes": 0.8493143177866957, "yes": 0.1292289036748425}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7375906006311312, "res": {"Yes": 0.7375906006311312, "yes": 0.24181422812438236}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7595825750092922, "res": {"Yes": 0.7595825750092922, "yes": 0.17984480400002423}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8664850701832637, "res": {"Yes": 0.8664850701832637, "yes": 0.1063340526664542}, "ground_truth": 1}, {"key": "40876288", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7624811211154018, "res": {"Yes": 0.7624811211154018, "yes": 0.18580119889739372}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7889276584361284, "res": {"Yes": 0.7889276584361284, "yes": 0.19609645959938563}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9608386040057967, "res": {"Yes": 0.9608386040057967, "yes": 0.031185260988483437}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.3005855119291301, "res": {"yes": 0.635168176673255, "Yes": 0.3005855119291301}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3064327991532484, "res": {"yes": 0.4626366297410251, "Yes": 0.3064327991532484}, "ground_truth": 1}, {"key": "40340131", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.34161100552761586, "res": {"yes": 0.5066997803457359, "Yes": 0.34161100552761586}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6448548669410308, "res": {"Yes": 0.6448548669410308, "yes": 0.2980352830466488}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8053902793355381, "res": {"Yes": 0.8053902793355381, "yes": 0.18433905994519648}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6747698427217014, "res": {"Yes": 0.6747698427217014, "yes": 0.28198102430687755}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6878150499054282, "res": {"Yes": 0.6878150499054282, "yes": 0.2762572450689904}, "ground_truth": 1}, {"key": "30121591", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4231537291806896, "res": {"yes": 0.5022621924109291, "Yes": 0.4231537291806896}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.3165733217281366, "res": {"yes": 0.6369066120784298, "Yes": 0.3165733217281366}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.729113307778414, "res": {"Yes": 0.729113307778414, "yes": 0.26703642677998873}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9751405319865245, "res": {"Yes": 0.9751405319865245, "yes": 0.017650291149791306}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7744557688905287, "res": {"Yes": 0.7744557688905287, "yes": 0.21881015440105034}, "ground_truth": 1}, {"key": "35623366", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8394658519035175, "res": {"Yes": 0.8394658519035175, "yes": 0.15544336902953923}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7159018660530524, "res": {"Yes": 0.7159018660530524, "yes": 0.279073971912294}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8768817941637913, "res": {"Yes": 0.8768817941637913, "yes": 0.11701337903446538}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9411759584156074, "res": {"Yes": 0.9411759584156074, "yes": 0.055002260202671426}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9029704051703115, "res": {"Yes": 0.9029704051703115, "yes": 0.09214513413540243}, "ground_truth": 1}, {"key": "41014093", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9114931864384729, "res": {"Yes": 0.9114931864384729, "yes": 0.08400717223784014}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8464827547773393, "res": {"Yes": 0.8464827547773393, "yes": 0.14779944877620357}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8192116693388082, "res": {"Yes": 0.8192116693388082, "yes": 0.16841036991535832}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9231932728768779, "res": {"Yes": 0.9231932728768779, "yes": 0.07517932645382738}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8817306721336595, "res": {"Yes": 0.8817306721336595, "yes": 0.11610083479431078}, "ground_truth": 1}, {"key": "11387984", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8064123554853356, "res": {"Yes": 0.8064123554853356, "yes": 0.19060804115706195}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8894858542839027, "res": {"Yes": 0.8894858542839027, "yes": 0.10828716991485063}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9793166543675156, "res": {"Yes": 0.9793166543675156, "yes": 0.0170674523590731}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9688521051540054, "res": {"Yes": 0.9688521051540054, "yes": 0.026455182609602185}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9866725781320875, "res": {"Yes": 0.9866725781320875, "yes": 0.007547474311246523}, "ground_truth": 1}, {"key": "39508312", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9894234838742264, "res": {"Yes": 0.9894234838742264, "yes": 0.008131446093178198}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7019886592509462, "res": {"Yes": 0.7019886592509462, "yes": 0.29418316365548386}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7897632910514688, "res": {"Yes": 0.7897632910514688, "yes": 0.19955779207823907}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7620711385316205, "res": {"Yes": 0.7620711385316205, "yes": 0.20461162682040598}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7204145851357566, "res": {"Yes": 0.7204145851357566, "yes": 0.2659062436002499}, "ground_truth": 1}, {"key": "35815369", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8647853786534978, "res": {"Yes": 0.8647853786534978, "yes": 0.11716104940946812}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6577504532305123, "res": {"Yes": 0.6577504532305123, "yes": 0.31796511610570616}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9855814011178486, "res": {"Yes": 0.9855814011178486, "yes": 0.013009488540868674}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9944909762344651, "res": {"Yes": 0.9944909762344651, "yes": 0.004674112377704147}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9121216405552725, "res": {"Yes": 0.9121216405552725, "yes": 0.08653267419589471}, "ground_truth": 1}, {"key": "35802823", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8256419945967978, "res": {"Yes": 0.8256419945967978, "yes": 0.1719331397714118}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9141464596937041, "res": {"Yes": 0.9141464596937041, "yes": 0.08418664338068088}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7795823900863638, "res": {"Yes": 0.7795823900863638, "yes": 0.2034203400747253}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7504324639743193, "res": {"Yes": 0.7504324639743193, "yes": 0.20813930247666104}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9612925825516523, "res": {"Yes": 0.9612925825516523, "yes": 0.030331939886386636}, "ground_truth": 1}, {"key": "38499968", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6863703967612615, "res": {"Yes": 0.6863703967612615, "yes": 0.28775185063558784}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7028433097463276, "res": {"Yes": 0.7028433097463276, "yes": 0.2690877619464682}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8099603849344469, "res": {"Yes": 0.8099603849344469, "yes": 0.18371886452655817}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.3311350767473057, "res": {"yes": 0.6650267590253437, "Yes": 0.3311350767473057}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8444464289283204, "res": {"Yes": 0.8444464289283204, "yes": 0.14601882897601548}, "ground_truth": 1}, {"key": "36926726", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.40480893039490157, "res": {"yes": 0.5904884519079786, "Yes": 0.40480893039490157}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7334371281009244, "res": {"Yes": 0.7334371281009244, "yes": 0.26075101304212017}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9122759845939918, "res": {"Yes": 0.9122759845939918, "yes": 0.08107140252171477}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8545007489067651, "res": {"Yes": 0.8545007489067651, "yes": 0.14081236774750808}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7081773799605809, "res": {"Yes": 0.7081773799605809, "yes": 0.27950214835341836}, "ground_truth": 1}, {"key": "40903712", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.848853842519633, "res": {"Yes": 0.848853842519633, "yes": 0.14440350210684785}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8696581548575778, "res": {"Yes": 0.8696581548575778, "yes": 0.12496810927874395}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5522623576608463, "res": {"Yes": 0.5522623576608463, "yes": 0.44249551065413595}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9746425798131028, "res": {"Yes": 0.9746425798131028, "yes": 0.017932241235240766}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9446037288348654, "res": {"Yes": 0.9446037288348654, "yes": 0.047408331295416745}, "ground_truth": 1}, {"key": "19614862", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7709566751952217, "res": {"Yes": 0.7709566751952217, "yes": 0.2218720976171848}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7756808415987488, "res": {"Yes": 0.7756808415987488, "yes": 0.21856601105085238}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.991742891744323, "res": {"Yes": 0.991742891744323, "yes": 0.006734205108933688}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9949116979903719, "res": {"Yes": 0.9949116979903719, "yes": 0.003978161106495587}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9901905803264817, "res": {"Yes": 0.9901905803264817, "yes": 0.008684724509994954}, "ground_truth": 1}, {"key": "38861704", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9836773756160472, "res": {"Yes": 0.9836773756160472, "yes": 0.014835660546631175}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9877834049765297, "res": {"Yes": 0.9877834049765297, "yes": 0.01007004302166618}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.677915859347713, "res": {"Yes": 0.677915859347713, "yes": 0.31646627473339534}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8544096212613623, "res": {"Yes": 0.8544096212613623, "yes": 0.13886375588844613}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7728512353119145, "res": {"Yes": 0.7728512353119145, "yes": 0.21607668741047154}, "ground_truth": 1}, {"key": "34349607", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8214811480275179, "res": {"Yes": 0.8214811480275179, "yes": 0.172685475475165}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8646469548898166, "res": {"Yes": 0.8646469548898166, "yes": 0.1291590743254879}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7383919040742992, "res": {"Yes": 0.7383919040742992, "yes": 0.23628775874578875}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8243800332456315, "res": {"Yes": 0.8243800332456315, "yes": 0.1632204725129605}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7942338100399823, "res": {"Yes": 0.7942338100399823, "yes": 0.18892414380807626}, "ground_truth": 1}, {"key": "20773800", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8245827732216651, "res": {"Yes": 0.8245827732216651, "yes": 0.15770835595794117}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7178945550360935, "res": {"Yes": 0.7178945550360935, "yes": 0.2576417564160471}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8505962200803096, "res": {"Yes": 0.8505962200803096, "yes": 0.14370987050239983}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9817248523285409, "res": {"Yes": 0.9817248523285409, "yes": 0.011488150110957699}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9615141668004668, "res": {"Yes": 0.9615141668004668, "yes": 0.02550795282374021}, "ground_truth": 1}, {"key": "35545608", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9648046146583276, "res": {"Yes": 0.9648046146583276, "yes": 0.025377853366527783}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9657040578888727, "res": {"Yes": 0.9657040578888727, "yes": 0.02336005655514497}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8688355866051877, "res": {"Yes": 0.8688355866051877, "yes": 0.12482406663646839}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7750925789914653, "res": {"Yes": 0.7750925789914653, "yes": 0.21843650535890557}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.823948097110599, "res": {"Yes": 0.823948097110599, "yes": 0.1723347625240328}, "ground_truth": 1}, {"key": "37258984", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.786582324497745, "res": {"Yes": 0.786582324497745, "yes": 0.2068181653545156}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8536490142609463, "res": {"Yes": 0.8536490142609463, "yes": 0.1424168298270618}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7458273499103398, "res": {"Yes": 0.7458273499103398, "yes": 0.24544281295566794}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6612813412218319, "res": {"Yes": 0.6612813412218319, "yes": 0.32700853745520475}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8118940155994977, "res": {"Yes": 0.8118940155994977, "yes": 0.16335405781714146}, "ground_truth": 1}, {"key": "37274562", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7968914880206146, "res": {"Yes": 0.7968914880206146, "yes": 0.1942743002007245}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9020109403517482, "res": {"Yes": 0.9020109403517482, "yes": 0.09195414041154605}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8123565615451004, "res": {"Yes": 0.8123565615451004, "yes": 0.1325411255771447}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9924601573048751, "res": {"Yes": 0.9924601573048751, "yes": 0.006903360202834769}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8172535005980182, "res": {"Yes": 0.8172535005980182, "yes": 0.15968674924441542}, "ground_truth": 1}, {"key": "40828068", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.71011763469615, "res": {"Yes": 0.71011763469615, "yes": 0.22097621669946144}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8092968214646951, "res": {"Yes": 0.8092968214646951, "yes": 0.13539005097097576}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9411852393974932, "res": {"Yes": 0.9411852393974932, "yes": 0.05487715155850418}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9016567811808383, "res": {"Yes": 0.9016567811808383, "yes": 0.09095153470488589}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8935089210904373, "res": {"Yes": 0.8935089210904373, "yes": 0.1005892888147063}, "ground_truth": 1}, {"key": "37807180", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.757684391574719, "res": {"Yes": 0.757684391574719, "yes": 0.23269837312410147}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9325437935261531, "res": {"Yes": 0.9325437935261531, "yes": 0.06009954549192124}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.4824665196196393, "res": {"yes": 0.4949943156572945, "Yes": 0.4824665196196393}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.571569118958958, "res": {"Yes": 0.571569118958958, "yes": 0.42140884005843854}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5807817721519144, "res": {"Yes": 0.5807817721519144, "yes": 0.4134896231485613}, "ground_truth": 1}, {"key": "40748607", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6857424905056217, "res": {"Yes": 0.6857424905056217, "yes": 0.3070295137568231}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7265567607357273, "res": {"Yes": 0.7265567607357273, "yes": 0.26654811702104897}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8445305990984425, "res": {"Yes": 0.8445305990984425, "yes": 0.13964695379065056}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8049138467191845, "res": {"Yes": 0.8049138467191845, "yes": 0.17718932239345211}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8015414041008175, "res": {"Yes": 0.8015414041008175, "yes": 0.12780288026600242}, "ground_truth": 1}, {"key": "40123819", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7777965055550591, "res": {"Yes": 0.7777965055550591, "yes": 0.19037598381614873}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7617255490677817, "res": {"Yes": 0.7617255490677817, "yes": 0.2212228089605595}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9560950492225512, "res": {"Yes": 0.9560950492225512, "yes": 0.03071938163420955}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.80628671010139, "res": {"Yes": 0.80628671010139, "yes": 0.18810044065771234}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8413755474418819, "res": {"Yes": 0.8413755474418819, "yes": 0.15358443469877223}, "ground_truth": 1}, {"key": "38453867", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9855443597225553, "res": {"Yes": 0.9855443597225553, "yes": 0.009323553930078197}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.760735508006532, "res": {"Yes": 0.760735508006532, "yes": 0.23423263284837642}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6452321335222568, "res": {"Yes": 0.6452321335222568, "yes": 0.3444025163431893}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6766839865874094, "res": {"Yes": 0.6766839865874094, "yes": 0.314199985283066}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6312671565934515, "res": {"Yes": 0.6312671565934515, "yes": 0.36465634115523327}, "ground_truth": 1}, {"key": "38944856", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7960420881303082, "res": {"Yes": 0.7960420881303082, "yes": 0.1964113882034902}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6134286263909912, "res": {"Yes": 0.6134286263909912, "yes": 0.37463825946663376}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7095756384216034, "res": {"Yes": 0.7095756384216034, "yes": 0.2860434089883812}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8159741038496345, "res": {"Yes": 0.8159741038496345, "yes": 0.17916811929289886}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9769796052786204, "res": {"Yes": 0.9769796052786204, "yes": 0.014503968171436055}, "ground_truth": 1}, {"key": "35778898", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9795696102466748, "res": {"Yes": 0.9795696102466748, "yes": 0.013125111175841758}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9854772020904164, "res": {"Yes": 0.9854772020904164, "yes": 0.00976358529707871}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7975072846559722, "res": {"Yes": 0.7975072846559722, "yes": 0.19333019442003938}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8208275010081728, "res": {"Yes": 0.8208275010081728, "yes": 0.17344425986547207}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8890280994833331, "res": {"Yes": 0.8890280994833331, "yes": 0.10447630287909015}, "ground_truth": 1}, {"key": "32530125", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9455250134233295, "res": {"Yes": 0.9455250134233295, "yes": 0.04954337650984413}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8650910132112323, "res": {"Yes": 0.8650910132112323, "yes": 0.12866034905496124}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6903295891512892, "res": {"Yes": 0.6903295891512892, "yes": 0.3011228373421995}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5879881995955151, "res": {"Yes": 0.5879881995955151, "yes": 0.4063009653652565}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6542103586049001, "res": {"Yes": 0.6542103586049001, "yes": 0.34000406689077894}, "ground_truth": 1}, {"key": "35010363", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7352767500197916, "res": {"Yes": 0.7352767500197916, "yes": 0.258755830842359}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6009862038709541, "res": {"Yes": 0.6009862038709541, "yes": 0.39292453832115415}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6931323477721674, "res": {"Yes": 0.6931323477721674, "yes": 0.2759770414160745}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9857525750035661, "res": {"Yes": 0.9857525750035661, "yes": 0.010753570930354745}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7082891250029257, "res": {"Yes": 0.7082891250029257, "yes": 0.2852020222012265}, "ground_truth": 1}, {"key": "27514800", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9108321616376799, "res": {"Yes": 0.9108321616376799, "yes": 0.08620612915102939}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.753586175877377, "res": {"Yes": 0.753586175877377, "yes": 0.20284103206376647}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7614540118503332, "res": {"Yes": 0.7614540118503332, "yes": 0.20559462268479278}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7963830820889212, "res": {"Yes": 0.7963830820889212, "yes": 0.18458374796166124}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7289378823503202, "res": {"Yes": 0.7289378823503202, "yes": 0.25238984067919223}, "ground_truth": 1}, {"key": "25725840", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8290300798547352, "res": {"Yes": 0.8290300798547352, "yes": 0.14933852055188818}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6771370427779738, "res": {"Yes": 0.6771370427779738, "yes": 0.31043513637343934}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8488114178647849, "res": {"Yes": 0.8488114178647849, "yes": 0.14730772689980376}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9323608603669779, "res": {"Yes": 0.9323608603669779, "yes": 0.06486683518389134}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9141600037879853, "res": {"Yes": 0.9141600037879853, "yes": 0.0833458300859193}, "ground_truth": 1}, {"key": "38327225", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.927447128906243, "res": {"Yes": 0.927447128906243, "yes": 0.070165622210475}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9201158987235933, "res": {"Yes": 0.9201158987235933, "yes": 0.07761276250851395}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.3660430974197777, "res": {"yes": 0.414846046445173, "Yes": 0.3660430974197777}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8007616754147817, "res": {"Yes": 0.8007616754147817, "yes": 0.19299808739438643}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3971567499802357, "res": {"Yes": 0.3971567499802357, "yes": 0.2032091164225458}, "ground_truth": 1}, {"key": "11991724", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5853785599562615, "res": {"Yes": 0.5853785599562615, "yes": 0.3015700052146808}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.32567087287666024, "res": {"Yes": 0.32567087287666024, "yes": 0.19077766243755181}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.660072870637229, "res": {"Yes": 0.660072870637229, "yes": 0.3279911344759573}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8116002280843675, "res": {"Yes": 0.8116002280843675, "yes": 0.17966093480594417}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8246387725392299, "res": {"Yes": 0.8246387725392299, "yes": 0.16980956087597088}, "ground_truth": 1}, {"key": "32217545", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.77927012854827, "res": {"Yes": 0.77927012854827, "yes": 0.21526303069829947}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6856197261603137, "res": {"Yes": 0.6856197261603137, "yes": 0.3087287250916089}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7994886045743598, "res": {"Yes": 0.7994886045743598, "yes": 0.19653111778135848}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7135852270032963, "res": {"Yes": 0.7135852270032963, "yes": 0.27888303903060213}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5830621468163864, "res": {"Yes": 0.5830621468163864, "yes": 0.41083252059627634}, "ground_truth": 1}, {"key": "12731847", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6742432959401586, "res": {"Yes": 0.6742432959401586, "yes": 0.31705684481436985}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5960934076212577, "res": {"Yes": 0.5960934076212577, "yes": 0.3914737952573618}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9379356506947739, "res": {"Yes": 0.9379356506947739, "yes": 0.033879475175702826}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6088849532729523, "res": {"Yes": 0.6088849532729523, "yes": 0.3642137712773549}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7269004597582511, "res": {"Yes": 0.7269004597582511, "yes": 0.25270249279367984}, "ground_truth": 1}, {"key": "36827234", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8109426033980633, "res": {"Yes": 0.8109426033980633, "yes": 0.16375343951755703}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9285139073529893, "res": {"Yes": 0.9285139073529893, "yes": 0.05953995971492256}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8942021238904964, "res": {"Yes": 0.8942021238904964, "yes": 0.08585186224737586}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8653877470537403, "res": {"Yes": 0.8653877470537403, "yes": 0.1286070818341568}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8939980902469487, "res": {"Yes": 0.8939980902469487, "yes": 0.094416946871402}, "ground_truth": 1}, {"key": "29111539", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9283065562126849, "res": {"Yes": 0.9283065562126849, "yes": 0.06278064836808775}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6879747515848679, "res": {"Yes": 0.6879747515848679, "yes": 0.30692550977683136}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9560175771113687, "res": {"Yes": 0.9560175771113687, "yes": 0.03253755884937525}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6434473458015472, "res": {"Yes": 0.6434473458015472, "yes": 0.3491486556668839}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9474848515153111, "res": {"Yes": 0.9474848515153111, "yes": 0.039878577660838174}, "ground_truth": 1}, {"key": "37763052", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9722326629403493, "res": {"Yes": 0.9722326629403493, "yes": 0.018682360295773345}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9293033495924679, "res": {"Yes": 0.9293033495924679, "yes": 0.05330021041870638}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9740137007111315, "res": {"Yes": 0.9740137007111315, "yes": 0.02006555731839705}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7623193245407204, "res": {"Yes": 0.7623193245407204, "yes": 0.22927232266533254}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9667668133737353, "res": {"Yes": 0.9667668133737353, "yes": 0.0291026920143267}, "ground_truth": 1}, {"key": "30682335", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9693700126282165, "res": {"Yes": 0.9693700126282165, "yes": 0.026480019804752373}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9800167408141546, "res": {"Yes": 0.9800167408141546, "yes": 0.016180394268555612}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8911631049136226, "res": {"Yes": 0.8911631049136226, "yes": 0.10425805031892334}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8867894305066026, "res": {"Yes": 0.8867894305066026, "yes": 0.10655369952684099}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8048447236862137, "res": {"Yes": 0.8048447236862137, "yes": 0.18835012145642296}, "ground_truth": 1}, {"key": "12261276", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9387693815422593, "res": {"Yes": 0.9387693815422593, "yes": 0.047073804519142226}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7847137400985755, "res": {"Yes": 0.7847137400985755, "yes": 0.20844763924527202}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.3123630643252491, "res": {"yes": 0.6669390810474819, "Yes": 0.3123630643252491}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7491000247414998, "res": {"Yes": 0.7491000247414998, "yes": 0.24456966065307928}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.42626315017919797, "res": {"yes": 0.555090530788637, "Yes": 0.42626315017919797}, "ground_truth": 1}, {"key": "36912979", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4019956143498956, "res": {"yes": 0.4952242741162637, "Yes": 0.4019956143498956}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.23031650668893902, "res": {"yes": 0.6926255691957348, "Yes": 0.23031650668893902}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5401821724104586, "res": {"Yes": 0.5401821724104586, "yes": 0.3058774950996303}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9867287529743637, "res": {"Yes": 0.9867287529743637, "yes": 0.009852579302058274}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4988532074275557, "res": {"Yes": 0.4988532074275557, "yes": 0.4378330962574411}, "ground_truth": 1}, {"key": "30205259", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7703908465454081, "res": {"Yes": 0.7703908465454081, "yes": 0.1908957903674231}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7425780024636137, "res": {"Yes": 0.7425780024636137, "yes": 0.20486632207177108}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9857348533030734, "res": {"Yes": 0.9857348533030734, " Yes": 0.0073877134741305705}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9862528855962212, "res": {"Yes": 0.9862528855962212, "yes": 0.008751015746812472}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8818235497166674, "res": {"Yes": 0.8818235497166674, "yes": 0.11395824153697}, "ground_truth": 1}, {"key": "39458032", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8870855789366777, "res": {"Yes": 0.8870855789366777, "yes": 0.11010731476438708}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9151808252568355, "res": {"Yes": 0.9151808252568355, "yes": 0.06704996441352029}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9859904411505234, "res": {"Yes": 0.9859904411505234, "yes": 0.009939918077975311}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8665415495207799, "res": {"Yes": 0.8665415495207799, "yes": 0.12700916878144503}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8073623361233119, "res": {"Yes": 0.8073623361233119, "yes": 0.18316675711559252}, "ground_truth": 1}, {"key": "35116452", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8836725089255117, "res": {"Yes": 0.8836725089255117, "yes": 0.11035940732789502}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8849299585389618, "res": {"Yes": 0.8849299585389618, "yes": 0.10973005222923522}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8793194789944553, "res": {"Yes": 0.8793194789944553, "yes": 0.11649906869997093}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.818168830026669, "res": {"Yes": 0.818168830026669, "yes": 0.17978972597218965}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8481339464030011, "res": {"Yes": 0.8481339464030011, "yes": 0.15007705751897696}, "ground_truth": 1}, {"key": "40107476", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7533269114832547, "res": {"Yes": 0.7533269114832547, "yes": 0.2437425825820039}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6833615183517406, "res": {"Yes": 0.6833615183517406, "yes": 0.31284184615209887}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9775200459178987, "res": {"Yes": 0.9775200459178987, "yes": 0.019411051697252813}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.978994227296162, "res": {"Yes": 0.978994227296162, "yes": 0.01590821399007576}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7660171656713581, "res": {"Yes": 0.7660171656713581, "yes": 0.22740331953340917}, "ground_truth": 1}, {"key": "39501049", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9807678173416373, "res": {"Yes": 0.9807678173416373, "yes": 0.014147250283939986}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9094987912970949, "res": {"Yes": 0.9094987912970949, "yes": 0.08830373264105865}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9710796269065044, "res": {"Yes": 0.9710796269065044, "yes": 0.022341136807442947}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6888965392389325, "res": {"Yes": 0.6888965392389325, "yes": 0.30310305978520496}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7158113817759101, "res": {"Yes": 0.7158113817759101, "yes": 0.2766009293088203}, "ground_truth": 1}, {"key": "39642178", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8161894841554576, "res": {"Yes": 0.8161894841554576, "yes": 0.17645443404320937}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9426410082266513, "res": {"Yes": 0.9426410082266513, "yes": 0.05175593331390842}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9004911160405198, "res": {"Yes": 0.9004911160405198, "yes": 0.09304336538946559}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8439803471707509, "res": {"Yes": 0.8439803471707509, "yes": 0.15304568613085553}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8929914737710386, "res": {"Yes": 0.8929914737710386, "yes": 0.10358481844510178}, "ground_truth": 1}, {"key": "38024796", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8269557682265364, "res": {"Yes": 0.8269557682265364, "yes": 0.17010681767323643}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.87378876515797, "res": {"Yes": 0.87378876515797, "yes": 0.12278050867648026}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9537901855610557, "res": {"Yes": 0.9537901855610557, "yes": 0.035284129689601235}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5728282187337584, "res": {"Yes": 0.5728282187337584, "yes": 0.4127131208592496}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9568633449222065, "res": {"Yes": 0.9568633449222065, "yes": 0.035674847155043696}, "ground_truth": 1}, {"key": "36652079", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.940367051139405, "res": {"Yes": 0.940367051139405, "yes": 0.05273536425527446}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9752879190377883, "res": {"Yes": 0.9752879190377883, "yes": 0.020299236590923107}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6446890960654994, "res": {"Yes": 0.6446890960654994, "yes": 0.33745949185714574}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.4987602797314956, "res": {"Yes": 0.4987602797314956, "yes": 0.4956336470430983}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5300690276857881, "res": {"Yes": 0.5300690276857881, "yes": 0.46408906256119997}, "ground_truth": 1}, {"key": "32193402", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7249781953203092, "res": {"Yes": 0.7249781953203092, "yes": 0.26563364339310463}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8484070555711397, "res": {"Yes": 0.8484070555711397, "yes": 0.14731761157990603}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8192645511588065, "res": {"Yes": 0.8192645511588065, "yes": 0.1465890918308833}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7345856795508252, "res": {"Yes": 0.7345856795508252, "yes": 0.2442691226069967}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9923068312766384, "res": {"Yes": 0.9923068312766384, "yes": 0.006947213048801166}, "ground_truth": 1}, {"key": "32589706", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8585408558235239, "res": {"Yes": 0.8585408558235239, "yes": 0.085284427730129}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6451923884469928, "res": {"Yes": 0.6451923884469928, "yes": 0.32690492486796674}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9008683612401355, "res": {"Yes": 0.9008683612401355, "yes": 0.0948954234028154}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8727478643807374, "res": {"Yes": 0.8727478643807374, "yes": 0.1256508060463975}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9842076812683395, "res": {"Yes": 0.9842076812683395, "yes": 0.01102224252547769}, "ground_truth": 1}, {"key": "38590589", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9735816412985595, "res": {"Yes": 0.9735816412985595, "yes": 0.02370670166827243}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9842218874245365, "res": {"Yes": 0.9842218874245365, "yes": 0.011288065069091713}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7965809919444607, "res": {"Yes": 0.7965809919444607, "yes": 0.19048148133129633}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8750431233590648, "res": {"Yes": 0.8750431233590648, "yes": 0.10435789390220422}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.912808863727438, "res": {"Yes": 0.912808863727438, "yes": 0.07207877938469767}, "ground_truth": 1}, {"key": "37045414", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8593754538594331, "res": {"Yes": 0.8593754538594331, "yes": 0.12054281234277651}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6768014014445266, "res": {"Yes": 0.6768014014445266, "yes": 0.29893775635145836}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9210227920174233, "res": {"Yes": 0.9210227920174233, "yes": 0.06252612559884047}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.476905033075577, "res": {"Yes": 0.476905033075577, "yes": 0.392460140414741}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5418407174375378, "res": {"Yes": 0.5418407174375378, "yes": 0.39499380411059526}, "ground_truth": 1}, {"key": "33310095", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.574208079794535, "res": {"Yes": 0.574208079794535, "yes": 0.380387161074136}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7225612409696096, "res": {"Yes": 0.7225612409696096, "yes": 0.24055084453324452}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5698084655349497, "res": {"Yes": 0.5698084655349497, "yes": 0.42302470389421426}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7855700078900656, "res": {"Yes": 0.7855700078900656, "yes": 0.21078182187981795}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8330847260856773, "res": {"Yes": 0.8330847260856773, "yes": 0.16432578955599694}, "ground_truth": 1}, {"key": "37934604", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6575313075619658, "res": {"Yes": 0.6575313075619658, "yes": 0.33998635314034115}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7944181651576165, "res": {"Yes": 0.7944181651576165, "yes": 0.2022084263339954}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9491623311954248, "res": {"Yes": 0.9491623311954248, "yes": 0.04319375734831875}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9044159705592429, "res": {"Yes": 0.9044159705592429, "yes": 0.08438727153078572}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8861988481328306, "res": {"Yes": 0.8861988481328306, "yes": 0.10059350359420663}, "ground_truth": 1}, {"key": "39012181", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.837460592601601, "res": {"Yes": 0.837460592601601, "yes": 0.14214220199071886}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8917338681593566, "res": {"Yes": 0.8917338681593566, "yes": 0.08423845936190484}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8580730184728754, "res": {"Yes": 0.8580730184728754, "yes": 0.13721117727358112}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9650031764408686, "res": {"Yes": 0.9650031764408686, "yes": 0.02763915920627345}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9750593773426445, "res": {"Yes": 0.9750593773426445, "yes": 0.01820212378907349}, "ground_truth": 1}, {"key": "40221674", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9721291237309451, "res": {"Yes": 0.9721291237309451, "yes": 0.017004968719722013}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.975235769836455, "res": {"Yes": 0.975235769836455, "yes": 0.0173225007702058}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8973878900637934, "res": {"Yes": 0.8973878900637934, "yes": 0.07667830238529585}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9879308031347525, "res": {"Yes": 0.9879308031347525, "yes": 0.01024224955901605}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.902325869295306, "res": {"Yes": 0.902325869295306, "yes": 0.0961758684972343}, "ground_truth": 1}, {"key": "36884862", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6055166006088936, "res": {"Yes": 0.6055166006088936, "yes": 0.31386478714584615}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8339054186074064, "res": {"Yes": 0.8339054186074064, "yes": 0.15020772643446667}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8873840713460847, "res": {"Yes": 0.8873840713460847, "yes": 0.10533874276690161}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9458433647630026, "res": {"Yes": 0.9458433647630026, "yes": 0.05127702924949338}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9386587634760828, "res": {"Yes": 0.9386587634760828, "yes": 0.05671985234432098}, "ground_truth": 1}, {"key": "39054429", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8855728812068995, "res": {"Yes": 0.8855728812068995, "yes": 0.10769530382448138}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9082562286916177, "res": {"Yes": 0.9082562286916177, "yes": 0.08764264363516834}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8403165356263985, "res": {"Yes": 0.8403165356263985, "yes": 0.15084633640741887}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.789937034391062, "res": {"Yes": 0.789937034391062, "yes": 0.19958056292136708}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8487165175656535, "res": {"Yes": 0.8487165175656535, "yes": 0.14434159515894265}, "ground_truth": 1}, {"key": "36753964", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7885977742100891, "res": {"Yes": 0.7885977742100891, "yes": 0.20341346458342724}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8630398955987237, "res": {"Yes": 0.8630398955987237, "yes": 0.12990994491485844}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8713334757667889, "res": {"Yes": 0.8713334757667889, "yes": 0.11789583483621097}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8179326015231162, "res": {"Yes": 0.8179326015231162, "yes": 0.17404430251858696}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.814811125868065, "res": {"Yes": 0.814811125868065, "yes": 0.17556895627474545}, "ground_truth": 1}, {"key": "37612459", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8655702592652502, "res": {"Yes": 0.8655702592652502, "yes": 0.12288872596734138}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8925774815810982, "res": {"Yes": 0.8925774815810982, "yes": 0.10071908236904184}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8759368146869406, "res": {"Yes": 0.8759368146869406, "yes": 0.11425214211707518}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8308857986371402, "res": {"Yes": 0.8308857986371402, "yes": 0.15458365080587236}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8925656148423649, "res": {"Yes": 0.8925656148423649, "yes": 0.07854378806237482}, "ground_truth": 1}, {"key": "36805789", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.682517439410267, "res": {"Yes": 0.682517439410267, "yes": 0.28535906913765136}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9894890808678984, "res": {"Yes": 0.9894890808678984, "yes": 0.005382574971888512}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5917183011495268, "res": {"Yes": 0.5917183011495268, "yes": 0.32635308529514623}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7583167383493544, "res": {"Yes": 0.7583167383493544, "yes": 0.19694869807917365}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.609727221315541, "res": {"Yes": 0.609727221315541, "yes": 0.3629632461931649}, "ground_truth": 1}, {"key": "12757394", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.46927465669242624, "res": {"yes": 0.4895322184874151, "Yes": 0.46927465669242624}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5699210993088287, "res": {"Yes": 0.5699210993088287, "yes": 0.37999848998244834}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.906660925998092, "res": {"Yes": 0.906660925998092, "yes": 0.08753937384782459}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7879386795359523, "res": {"Yes": 0.7879386795359523, "yes": 0.1964539355178829}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8586530658601766, "res": {"Yes": 0.8586530658601766, "yes": 0.13342786289920114}, "ground_truth": 1}, {"key": "32192542", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9204873600652227, "res": {"Yes": 0.9204873600652227, "yes": 0.07188865609992066}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8676928815769219, "res": {"Yes": 0.8676928815769219, "yes": 0.12323731404473225}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7849725892203655, "res": {"Yes": 0.7849725892203655, "yes": 0.20466321591405948}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6936441653566291, "res": {"Yes": 0.6936441653566291, "yes": 0.29278146543750416}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7514447215346873, "res": {"Yes": 0.7514447215346873, "yes": 0.23715761895945947}, "ground_truth": 1}, {"key": "34856060", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7962777833159583, "res": {"Yes": 0.7962777833159583, "yes": 0.18823912178019342}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6545895113726266, "res": {"Yes": 0.6545895113726266, "yes": 0.33229657853106825}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6331132265037946, "res": {"Yes": 0.6331132265037946, "yes": 0.3488180288731473}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7941459885210094, "res": {"Yes": 0.7941459885210094, "yes": 0.18802025172731082}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6911025763459795, "res": {"Yes": 0.6911025763459795, "yes": 0.30511213933437126}, "ground_truth": 1}, {"key": "36083416", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7430671956967837, "res": {"Yes": 0.7430671956967837, "yes": 0.2478099146235818}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5004004505550491, "res": {"Yes": 0.5004004505550491, "yes": 0.49447912656668724}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7841407787652148, "res": {"Yes": 0.7841407787652148, "yes": 0.21216056688901844}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7183724225859197, "res": {"Yes": 0.7183724225859197, "yes": 0.2743988977731974}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9105553417028911, "res": {"Yes": 0.9105553417028911, "yes": 0.08510054833534558}, "ground_truth": 1}, {"key": "33839050", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7630831087292534, "res": {"Yes": 0.7630831087292534, "yes": 0.2310647973007142}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8322683617406504, "res": {"Yes": 0.8322683617406504, "yes": 0.1613989062195172}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9774169640443959, "res": {"Yes": 0.9774169640443959, "yes": 0.015597375750487533}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6783249965142898, "res": {"Yes": 0.6783249965142898, "yes": 0.31511576491633575}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7905412907372749, "res": {"Yes": 0.7905412907372749, "yes": 0.20477204380352954}, "ground_truth": 1}, {"key": "18464690", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9604352018358074, "res": {"Yes": 0.9604352018358074, "yes": 0.03245661514692872}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6957547857858069, "res": {"Yes": 0.6957547857858069, "yes": 0.30028096278250177}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9786704256501882, "res": {"Yes": 0.9786704256501882, "yes": 0.01705859664955415}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8491595698312231, "res": {"Yes": 0.8491595698312231, "yes": 0.1445767891177105}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8532409224881944, "res": {"Yes": 0.8532409224881944, "yes": 0.14103275649749283}, "ground_truth": 1}, {"key": "39212665", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8125322688980955, "res": {"Yes": 0.8125322688980955, "yes": 0.17784805799312042}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.696345983104798, "res": {"Yes": 0.696345983104798, "yes": 0.2877949590915558}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7884480494800379, "res": {"Yes": 0.7884480494800379, "yes": 0.17500651100637946}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7985346407410405, "res": {"Yes": 0.7985346407410405, "yes": 0.15319602017240386}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8441332142938914, "res": {"Yes": 0.8441332142938914, "yes": 0.1511684803895831}, "ground_truth": 1}, {"key": "40094011", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6524639110829529, "res": {"Yes": 0.6524639110829529, "yes": 0.3011631302733729}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7587530324327052, "res": {"Yes": 0.7587530324327052, "yes": 0.17011598667781147}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.821818361298596, "res": {"Yes": 0.821818361298596, "yes": 0.16425225379112357}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7671675522789698, "res": {"Yes": 0.7671675522789698, "yes": 0.2216248259330818}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8832905401117437, "res": {"Yes": 0.8832905401117437, "yes": 0.10374679623611702}, "ground_truth": 1}, {"key": "36036272", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9853461689425513, "res": {"Yes": 0.9853461689425513, "yes": 0.007086889862880341}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8963686597643059, "res": {"Yes": 0.8963686597643059, "yes": 0.09624598701828053}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8289589769940758, "res": {"Yes": 0.8289589769940758, "yes": 0.1670258313139876}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9231867643872466, "res": {"Yes": 0.9231867643872466, "yes": 0.07239162232709297}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.933305451804283, "res": {"Yes": 0.933305451804283, "yes": 0.06317136284879947}, "ground_truth": 1}, {"key": "30681904", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9552286768180533, "res": {"Yes": 0.9552286768180533, "yes": 0.041054636575415505}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8335815009399241, "res": {"Yes": 0.8335815009399241, "yes": 0.16329024785284058}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8861374366805864, "res": {"Yes": 0.8861374366805864, "yes": 0.10822834265153393}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8676507908161563, "res": {"Yes": 0.8676507908161563, "yes": 0.12363309634758674}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8793902758555985, "res": {"Yes": 0.8793902758555985, "yes": 0.10993642882746783}, "ground_truth": 1}, {"key": "27834240", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9126735954774585, "res": {"Yes": 0.9126735954774585, "yes": 0.07916294150154382}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9163441456585457, "res": {"Yes": 0.9163441456585457, "yes": 0.07704423820193809}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8387944303482188, "res": {"Yes": 0.8387944303482188, "yes": 0.15690998217677618}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8659996106635494, "res": {"Yes": 0.8659996106635494, "yes": 0.12956891204951504}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9023911955412319, "res": {"Yes": 0.9023911955412319, "yes": 0.0929983246971756}, "ground_truth": 1}, {"key": "35025075", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8514731516740852, "res": {"Yes": 0.8514731516740852, "yes": 0.14149973990892556}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8691709349593619, "res": {"Yes": 0.8691709349593619, "yes": 0.12524783808055814}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8648541145269206, "res": {"Yes": 0.8648541145269206, "yes": 0.12882754879793176}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9519994327897837, "res": {"Yes": 0.9519994327897837, "yes": 0.044172281588441775}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7800902272709916, "res": {"Yes": 0.7800902272709916, "yes": 0.2110247613545693}, "ground_truth": 1}, {"key": "33316985", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9163158310619077, "res": {"Yes": 0.9163158310619077, "yes": 0.07921907208480884}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9068765601380211, "res": {"Yes": 0.9068765601380211, "yes": 0.0878550308171042}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6611764703175966, "res": {"Yes": 0.6611764703175966, "yes": 0.2879511014193382}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.47016007366361295, "res": {"yes": 0.47273947738455735, "Yes": 0.47016007366361295}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5092966412242161, "res": {"Yes": 0.5092966412242161, "yes": 0.42754764778891335}, "ground_truth": 1}, {"key": "17037056", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4805270772126461, "res": {"Yes": 0.4805270772126461, "yes": 0.48030898664661664}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.3543005906578172, "res": {"yes": 0.5999376774965639, "Yes": 0.3543005906578172}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8791761703918258, "res": {"Yes": 0.8791761703918258, "yes": 0.11028387010481704}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9907328690022676, "res": {"Yes": 0.9907328690022676, "yes": 0.007735204941208171}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9701522316045162, "res": {"Yes": 0.9701522316045162, "yes": 0.02082265088360454}, "ground_truth": 1}, {"key": "34050457", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.977326887493037, "res": {"Yes": 0.977326887493037, "yes": 0.014162919961570678}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9434564690214694, "res": {"Yes": 0.9434564690214694, "yes": 0.0462755284877}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6028360860322058, "res": {"Yes": 0.6028360860322058, "yes": 0.39036926675238703}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8339301359303228, "res": {"Yes": 0.8339301359303228, "yes": 0.1542147758208008}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8383683806884956, "res": {"Yes": 0.8383683806884956, "yes": 0.15713838951723605}, "ground_truth": 1}, {"key": "34713745", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8047970782888892, "res": {"Yes": 0.8047970782888892, "yes": 0.1897915147643145}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8369856610511821, "res": {"Yes": 0.8369856610511821, "yes": 0.15658012800457324}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8827772457697387, "res": {"Yes": 0.8827772457697387, "yes": 0.09709600962979502}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.890143805584549, "res": {"Yes": 0.890143805584549, "yes": 0.09569526110340779}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7837966321884111, "res": {"Yes": 0.7837966321884111, "yes": 0.20066865775972423}, "ground_truth": 1}, {"key": "40856210", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7851246766915524, "res": {"Yes": 0.7851246766915524, "yes": 0.19308779444741805}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7921473472216954, "res": {"Yes": 0.7921473472216954, "yes": 0.18579387847437046}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9613961443344484, "res": {"Yes": 0.9613961443344484, "yes": 0.03322873384866001}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9750781903197592, "res": {"Yes": 0.9750781903197592, "yes": 0.017529317746535582}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9810061542411593, "res": {"Yes": 0.9810061542411593, "yes": 0.015481952318603874}, "ground_truth": 1}, {"key": "40848302", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7831907957042887, "res": {"Yes": 0.7831907957042887, "yes": 0.21311729160941925}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7545815966260407, "res": {"Yes": 0.7545815966260407, "yes": 0.23806155602295184}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8995769335594458, "res": {"Yes": 0.8995769335594458, "yes": 0.09329759019744202}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9490376526751635, "res": {"Yes": 0.9490376526751635, "yes": 0.043583549712272726}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9568861596869247, "res": {"Yes": 0.9568861596869247, "yes": 0.03916253221915187}, "ground_truth": 1}, {"key": "40636168", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9411433312342036, "res": {"Yes": 0.9411433312342036, "yes": 0.052701972251919034}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9499298796561227, "res": {"Yes": 0.9499298796561227, "yes": 0.04092849333212894}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.4842046166163427, "res": {"yes": 0.5107610758791016, "Yes": 0.4842046166163427}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6083682110769295, "res": {"Yes": 0.6083682110769295, "yes": 0.38780920499736177}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.527276130958875, "res": {"Yes": 0.527276130958875, "yes": 0.468696350741071}, "ground_truth": 1}, {"key": "34423311", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.63153005202653, "res": {"Yes": 0.63153005202653, "yes": 0.36527017271237033}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6833297769464131, "res": {"Yes": 0.6833297769464131, "yes": 0.31322553149645527}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9100291290850304, "res": {"Yes": 0.9100291290850304, "yes": 0.07881765964945427}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6362647237063095, "res": {"Yes": 0.6362647237063095, "yes": 0.33042016735106217}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6823640744996019, "res": {"Yes": 0.6823640744996019, "yes": 0.13654571663875645}, "ground_truth": 1}, {"key": "34833945", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9279553793026041, "res": {"Yes": 0.9279553793026041, "yes": 0.052094421963233606}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7580517302166435, "res": {"Yes": 0.7580517302166435, "yes": 0.20489318180721192}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.757041314814708, "res": {"Yes": 0.757041314814708, "yes": 0.23679705800851317}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8244454091743892, "res": {"Yes": 0.8244454091743892, "yes": 0.16786869207080388}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8231701220555844, "res": {"Yes": 0.8231701220555844, "yes": 0.1719721730242465}, "ground_truth": 1}, {"key": "21272328", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8674017413773957, "res": {"Yes": 0.8674017413773957, "yes": 0.12703210870874007}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8247597476734779, "res": {"Yes": 0.8247597476734779, "yes": 0.1679141066940599}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.733762567660104, "res": {"Yes": 0.733762567660104, "yes": 0.26204985685102383}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8897444378269709, "res": {"Yes": 0.8897444378269709, "yes": 0.10443023898735515}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7655423362941771, "res": {"Yes": 0.7655423362941771, "yes": 0.22571968080532964}, "ground_truth": 1}, {"key": "38648957", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8292443786618163, "res": {"Yes": 0.8292443786618163, "yes": 0.1622282313067727}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8960852279956661, "res": {"Yes": 0.8960852279956661, "yes": 0.09984209410216534}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8280966950118651, "res": {"Yes": 0.8280966950118651, "yes": 0.12917711910856994}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7467991541109995, "res": {"Yes": 0.7467991541109995, "yes": 0.22284961660686595}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7229997157333541, "res": {"Yes": 0.7229997157333541, "yes": 0.19138302356588868}, "ground_truth": 1}, {"key": "24942981", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7978274284452264, "res": {"Yes": 0.7978274284452264, "yes": 0.17771386892827118}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7233809816763606, "res": {"Yes": 0.7233809816763606, "yes": 0.2534619666869604}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.928080211616172, "res": {"Yes": 0.928080211616172, "yes": 0.06823777376081905}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9190939602627158, "res": {"Yes": 0.9190939602627158, "yes": 0.07741373959445352}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9272090040426738, "res": {"Yes": 0.9272090040426738, "yes": 0.06858703886014993}, "ground_truth": 1}, {"key": "35882366", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9273085322897182, "res": {"Yes": 0.9273085322897182, "yes": 0.06845265093143495}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8798082614201896, "res": {"Yes": 0.8798082614201896, "yes": 0.11471902512859061}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8234254596303897, "res": {"Yes": 0.8234254596303897, "yes": 0.17143770748824283}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8160744340333745, "res": {"Yes": 0.8160744340333745, "yes": 0.17981448472205683}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8127077379347514, "res": {"Yes": 0.8127077379347514, "yes": 0.18221834739899648}, "ground_truth": 1}, {"key": "40559523", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.859708380225999, "res": {"Yes": 0.859708380225999, "yes": 0.13739928541109844}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8534174848330939, "res": {"Yes": 0.8534174848330939, "yes": 0.1438778936344787}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6603595365083165, "res": {"Yes": 0.6603595365083165, "yes": 0.3323245923134028}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8816990714726575, "res": {"Yes": 0.8816990714726575, "yes": 0.11474846580751619}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6840249673856262, "res": {"Yes": 0.6840249673856262, "yes": 0.3127170158828367}, "ground_truth": 1}, {"key": "24632722", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.970493429128811, "res": {"Yes": 0.970493429128811, "yes": 0.019434465830106922}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7108519704081516, "res": {"Yes": 0.7108519704081516, "yes": 0.2851403681890654}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9906524866751636, "res": {"Yes": 0.9906524866751636, "yes": 0.0047711810745662935}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8199239064843896, "res": {"Yes": 0.8199239064843896, "yes": 0.17099480552288868}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9761989628164486, "res": {"Yes": 0.9761989628164486, "yes": 0.02164042252520834}, "ground_truth": 1}, {"key": "36002759", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8439012024049493, "res": {"Yes": 0.8439012024049493, "yes": 0.1483462664105744}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.801360620796706, "res": {"Yes": 0.801360620796706, "yes": 0.19143208132131143}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6916947523518071, "res": {"Yes": 0.6916947523518071, "yes": 0.3043844616881537}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5912812656394499, "res": {"Yes": 0.5912812656394499, "yes": 0.40290743227816084}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.974101581165495, "res": {"Yes": 0.974101581165495, "yes": 0.018950988220014002}, "ground_truth": 1}, {"key": "29508534", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4900991310861282, "res": {"yes": 0.5058523756442291, "Yes": 0.4900991310861282}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6487200119013103, "res": {"Yes": 0.6487200119013103, "yes": 0.34686643037910775}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7373883736401309, "res": {"Yes": 0.7373883736401309, "yes": 0.2565659839718909}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9528625876201702, "res": {"Yes": 0.9528625876201702, "yes": 0.03972194393662871}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.74653658089142, "res": {"Yes": 0.74653658089142, "yes": 0.24722342414987564}, "ground_truth": 1}, {"key": "15631612", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5605818033602932, "res": {"Yes": 0.5605818033602932, "yes": 0.4336320870914548}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.671655861832897, "res": {"Yes": 0.671655861832897, "yes": 0.32365368768607217}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6801166849623136, "res": {"Yes": 0.6801166849623136, "yes": 0.31431921537400037}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7336111200223361, "res": {"Yes": 0.7336111200223361, "yes": 0.2566721472778257}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8800519316609665, "res": {"Yes": 0.8800519316609665, "yes": 0.11561652304992188}, "ground_truth": 1}, {"key": "40731892", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8281728916941478, "res": {"Yes": 0.8281728916941478, "yes": 0.16470208211738074}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.715807115552788, "res": {"Yes": 0.715807115552788, "yes": 0.2736055522850884}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.4373262915126369, "res": {"yes": 0.5593082928890447, "Yes": 0.4373262915126369}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5793324595978897, "res": {"Yes": 0.5793324595978897, "yes": 0.4165538808316537}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9052977172896, "res": {"Yes": 0.9052977172896, "yes": 0.08524265712153002}, "ground_truth": 1}, {"key": "35971910", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.49512124847192046, "res": {"yes": 0.49787033865733965, "Yes": 0.49512124847192046}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5003927694670856, "res": {"Yes": 0.5003927694670856, "yes": 0.4934601120606851}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.912081672260695, "res": {"Yes": 0.912081672260695, "yes": 0.06070256650589947}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8805968084124055, "res": {"Yes": 0.8805968084124055, "yes": 0.10961896293000564}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6909515316531176, "res": {"Yes": 0.6909515316531176, "yes": 0.2719796042153521}, "ground_truth": 1}, {"key": "34428424", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8282987421262851, "res": {"Yes": 0.8282987421262851, "yes": 0.13418207938309537}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8395460918842501, "res": {"Yes": 0.8395460918842501, "yes": 0.13075161817686476}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9864413459527117, "res": {"Yes": 0.9864413459527117, "yes": 0.011012194822743324}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5964147647157603, "res": {"Yes": 0.5964147647157603, "yes": 0.3303885806929297}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6827701268301415, "res": {"Yes": 0.6827701268301415, "yes": 0.30833645680831556}, "ground_truth": 1}, {"key": "36971005", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7391050179083206, "res": {"Yes": 0.7391050179083206, "yes": 0.2329713020894926}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8439776211186321, "res": {"Yes": 0.8439776211186321, "yes": 0.14321555586689924}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.884647843600246, "res": {"Yes": 0.884647843600246, "yes": 0.10343649804827773}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9533282252310039, "res": {"Yes": 0.9533282252310039, "yes": 0.03764665399500997}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9459226051641386, "res": {"Yes": 0.9459226051641386, "yes": 0.045048197807545605}, "ground_truth": 1}, {"key": "34649067", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9419612576609535, "res": {"Yes": 0.9419612576609535, "yes": 0.04625284978264007}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.95594872720478, "res": {"Yes": 0.95594872720478, "yes": 0.03654014353058127}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9514149637216099, "res": {"Yes": 0.9514149637216099, "yes": 0.024159538691323184}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9050154532623349, "res": {"Yes": 0.9050154532623349, "yes": 0.08915833009025041}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8908662134858362, "res": {"Yes": 0.8908662134858362, "yes": 0.10429459917037213}, "ground_truth": 1}, {"key": "37355154", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9489355616450503, "res": {"Yes": 0.9489355616450503, "yes": 0.04731411091322658}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9578615272874355, "res": {"Yes": 0.9578615272874355, "yes": 0.038649304458876016}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7987022867283321, "res": {"Yes": 0.7987022867283321, "yes": 0.18782988365002132}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8211834561378871, "res": {"Yes": 0.8211834561378871, "yes": 0.1654384924616819}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7989791888905065, "res": {"Yes": 0.7989791888905065, "yes": 0.18906840465719385}, "ground_truth": 1}, {"key": "38674697", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8742695073916751, "res": {"Yes": 0.8742695073916751, "yes": 0.11935457431930507}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9706808397980222, "res": {"Yes": 0.9706808397980222, "yes": 0.020958340443037154}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.932812581118419, "res": {"Yes": 0.932812581118419, "yes": 0.06420032538479999}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8884526543292363, "res": {"Yes": 0.8884526543292363, "yes": 0.10033406606044643}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9477719544903728, "res": {"Yes": 0.9477719544903728, "yes": 0.04859689203585969}, "ground_truth": 1}, {"key": "40525767", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9462015118254593, "res": {"Yes": 0.9462015118254593, "yes": 0.04653525879788423}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8340603142447425, "res": {"Yes": 0.8340603142447425, "yes": 0.15149323628694344}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8813087239230575, "res": {"Yes": 0.8813087239230575, "yes": 0.10980125720695383}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9115506305491271, "res": {"Yes": 0.9115506305491271, "yes": 0.0738857128922111}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9229086685733289, "res": {"Yes": 0.9229086685733289, "yes": 0.06606694056340574}, "ground_truth": 1}, {"key": "27165110", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8231406448613059, "res": {"Yes": 0.8231406448613059, "yes": 0.16353526889684308}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8917036344350968, "res": {"Yes": 0.8917036344350968, "yes": 0.10013496028894522}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8501500293120182, "res": {"Yes": 0.8501500293120182, "yes": 0.14627269871855664}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8707730401199615, "res": {"Yes": 0.8707730401199615, "yes": 0.1210718025353577}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8855351087868026, "res": {"Yes": 0.8855351087868026, "yes": 0.10604659563451563}, "ground_truth": 1}, {"key": "35497491", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.92905047409747, "res": {"Yes": 0.92905047409747, "yes": 0.06459314424911836}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.849814396839902, "res": {"Yes": 0.849814396839902, "yes": 0.14437180901657934}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9930544237867655, "res": {"Yes": 0.9930544237867655, "yes": 0.00452427923319181}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.718046578123293, "res": {"Yes": 0.718046578123293, "yes": 0.08903555452317982}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9927260697228378, "res": {"Yes": 0.9927260697228378, "yes": 0.006217098252347793}, "ground_truth": 1}, {"key": "40690716", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.711650684227254, "res": {"Yes": 0.711650684227254, "yes": 0.2150188887633968}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8439808451193026, "res": {"Yes": 0.8439808451193026, "yes": 0.12158644350485315}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9395997427350928, "res": {"Yes": 0.9395997427350928, "yes": 0.05661999996027448}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9560212845547215, "res": {"Yes": 0.9560212845547215, "yes": 0.03998513175872046}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9585752801879182, "res": {"Yes": 0.9585752801879182, "yes": 0.03783999753277379}, "ground_truth": 1}, {"key": "34835193", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9538597127230427, "res": {"Yes": 0.9538597127230427, "yes": 0.041737565682871486}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9470527927403407, "res": {"Yes": 0.9470527927403407, "yes": 0.0490177157644661}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6575939732815506, "res": {"Yes": 0.6575939732815506, "yes": 0.32812521184946253}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8207628551799795, "res": {"Yes": 0.8207628551799795, "yes": 0.17007468753016552}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7543374082656109, "res": {"Yes": 0.7543374082656109, "yes": 0.24197581399244075}, "ground_truth": 1}, {"key": "39471712", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8324623777838099, "res": {"Yes": 0.8324623777838099, "yes": 0.16054220684275142}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.39298278584237784, "res": {"yes": 0.6007784488086917, "Yes": 0.39298278584237784}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5498062145625175, "res": {"Yes": 0.5498062145625175, "yes": 0.44161363859350766}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5706199372279853, "res": {"Yes": 0.5706199372279853, "yes": 0.4232943834622729}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5529552516458703, "res": {"Yes": 0.5529552516458703, "yes": 0.44149513864294543}, "ground_truth": 1}, {"key": "39115192", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5688615079498612, "res": {"Yes": 0.5688615079498612, "yes": 0.4258999582843616}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5208269563097121, "res": {"Yes": 0.5208269563097121, "yes": 0.47346639043744665}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9722670387783225, "res": {"Yes": 0.9722670387783225, "yes": 0.02129735057827108}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9772729180605196, "res": {"Yes": 0.9772729180605196, "yes": 0.017237557502029488}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9351814316240049, "res": {"Yes": 0.9351814316240049, "yes": 0.059989064002357005}, "ground_truth": 1}, {"key": "23520673", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7411690959922574, "res": {"Yes": 0.7411690959922574, "yes": 0.2552978694626586}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9008592670199319, "res": {"Yes": 0.9008592670199319, "yes": 0.08683376806175586}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8306898074874287, "res": {"Yes": 0.8306898074874287, "yes": 0.16278447375776614}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8354485940574541, "res": {"Yes": 0.8354485940574541, "yes": 0.15888015549359252}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9064274546086117, "res": {"Yes": 0.9064274546086117, "yes": 0.08898188226761371}, "ground_truth": 1}, {"key": "35764233", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9297289239006624, "res": {"Yes": 0.9297289239006624, "yes": 0.06718825657986396}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7327255627488553, "res": {"Yes": 0.7327255627488553, "yes": 0.2583330848934852}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8579535744420685, "res": {"Yes": 0.8579535744420685, "yes": 0.1392551662951677}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6910702818771529, "res": {"Yes": 0.6910702818771529, "yes": 0.30433129036734685}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8665032405657013, "res": {"Yes": 0.8665032405657013, "yes": 0.12901685812748068}, "ground_truth": 1}, {"key": "35228910", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8149337478721339, "res": {"Yes": 0.8149337478721339, "yes": 0.1791589282041326}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8301488972530003, "res": {"Yes": 0.8301488972530003, "yes": 0.16612211974120147}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9327313609318653, "res": {"Yes": 0.9327313609318653, "yes": 0.06430085826531758}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9902925167816413, "res": {"Yes": 0.9902925167816413, "yes": 0.008298636954624057}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.996497778633861, "res": {"Yes": 0.996497778633861, "yes": 0.002085056553746883}, "ground_truth": 1}, {"key": "36795599", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9965171975752811, "res": {"Yes": 0.9965171975752811, "yes": 0.0020414749860715254}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9926136442034388, "res": {"Yes": 0.9926136442034388, "yes": 0.005103285658117297}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9606838983864183, "res": {"Yes": 0.9606838983864183, "yes": 0.034227444019475944}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9257712361972448, "res": {"Yes": 0.9257712361972448, "yes": 0.06659864812934443}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9331030108779679, "res": {"Yes": 0.9331030108779679, "yes": 0.06160454060740653}, "ground_truth": 1}, {"key": "38641949", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7860920196967126, "res": {"Yes": 0.7860920196967126, "yes": 0.20854846890225698}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.4388400425818966, "res": {"yes": 0.5516957843129666, "Yes": 0.4388400425818966}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8763160898221986, "res": {"Yes": 0.8763160898221986, "yes": 0.04792353304061869}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9151481721875245, "res": {"Yes": 0.9151481721875245, "yes": 0.06415741539365624}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9045163210780186, "res": {"Yes": 0.9045163210780186, "yes": 0.08384245306093067}, "ground_truth": 1}, {"key": "29968443", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8836720405792061, "res": {"Yes": 0.8836720405792061, "yes": 0.0754879679747416}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9258236548485963, "res": {"Yes": 0.9258236548485963, "yes": 0.06546576138217551}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8052117038413851, "res": {"Yes": 0.8052117038413851, "yes": 0.1846012657905506}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7687210646880265, "res": {"Yes": 0.7687210646880265, "yes": 0.22741280244955114}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9803853150201283, "res": {"Yes": 0.9803853150201283, "yes": 0.01782328910816595}, "ground_truth": 1}, {"key": "21268042", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9818222845191649, "res": {"Yes": 0.9818222845191649, "yes": 0.016338343037269675}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8250519458194573, "res": {"Yes": 0.8250519458194573, "yes": 0.17147683403726108}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.27720582449511394, "res": {"yes": 0.6022740528319416, "Yes": 0.27720582449511394}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8247038061170726, "res": {"Yes": 0.8247038061170726, "yes": 0.1695942431315943}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6621164977852272, "res": {"Yes": 0.6621164977852272, "yes": 0.3305328598411064}, "ground_truth": 1}, {"key": "26808572", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5897902581185024, "res": {"Yes": 0.5897902581185024, "yes": 0.40230903577009}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7385182980490884, "res": {"Yes": 0.7385182980490884, "yes": 0.25476207718877103}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8323033843301827, "res": {"Yes": 0.8323033843301827, "yes": 0.16256027872431772}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.732038112587402, "res": {"Yes": 0.732038112587402, "yes": 0.26279760652617734}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.538703344957951, "res": {"Yes": 0.538703344957951, "yes": 0.4569871530367623}, "ground_truth": 1}, {"key": "37829390", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9730396834428516, "res": {"Yes": 0.9730396834428516, "yes": 0.023589659965871344}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.637418438335781, "res": {"Yes": 0.637418438335781, "yes": 0.3579578632241086}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9583087374669811, "res": {"Yes": 0.9583087374669811, "yes": 0.027557154103925213}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.911453659482019, "res": {"Yes": 0.911453659482019, "yes": 0.08493919766878956}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7757347222612601, "res": {"Yes": 0.7757347222612601, "yes": 0.2174318958533112}, "ground_truth": 1}, {"key": "35716045", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.610423768835622, "res": {"Yes": 0.610423768835622, "yes": 0.38204552579607387}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7767590963464627, "res": {"Yes": 0.7767590963464627, "yes": 0.2173199037921997}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9861850564140792, "res": {"Yes": 0.9861850564140792, "yes": 0.00928232987109186}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9816984315231233, "res": {"Yes": 0.9816984315231233, "yes": 0.015029550237244886}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8619399364831865, "res": {"Yes": 0.8619399364831865, "yes": 0.12849624459699857}, "ground_truth": 1}, {"key": "34367070", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.974558554625252, "res": {"Yes": 0.974558554625252, "yes": 0.012000223552411931}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9414081968196955, "res": {"Yes": 0.9414081968196955, "yes": 0.0523902276644883}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8693291036924421, "res": {"Yes": 0.8693291036924421, "yes": 0.1214695092949829}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8419005949191791, "res": {"Yes": 0.8419005949191791, "yes": 0.15242850691101203}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8407416836462793, "res": {"Yes": 0.8407416836462793, "yes": 0.15261972637099888}, "ground_truth": 1}, {"key": "35239748", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8498991190600318, "res": {"Yes": 0.8498991190600318, "yes": 0.14109694100256678}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8209011653733839, "res": {"Yes": 0.8209011653733839, "yes": 0.17225757384197782}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5858970602139291, "res": {"Yes": 0.5858970602139291, "yes": 0.3793377385432449}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7878485445068354, "res": {"Yes": 0.7878485445068354, "yes": 0.2047181753655086}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8112077438699631, "res": {"Yes": 0.8112077438699631, "yes": 0.17447076276537266}, "ground_truth": 1}, {"key": "40421370", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7983808178599666, "res": {"Yes": 0.7983808178599666, "yes": 0.19049159616650155}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6137973648373491, "res": {"Yes": 0.6137973648373491, "yes": 0.3769883476884441}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7433944699564223, "res": {"Yes": 0.7433944699564223, "yes": 0.2503966546172985}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6680686976098023, "res": {"Yes": 0.6680686976098023, "yes": 0.32095151849004727}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7535140912716541, "res": {"Yes": 0.7535140912716541, "yes": 0.23935152302327084}, "ground_truth": 1}, {"key": "37288396", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8938124297007406, "res": {"Yes": 0.8938124297007406, "yes": 0.09847048700649162}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7843276539357448, "res": {"Yes": 0.7843276539357448, "yes": 0.21062876556118948}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9818213674975794, "res": {"Yes": 0.9818213674975794, "yes": 0.015123521982582384}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9860187217339264, "res": {"Yes": 0.9860187217339264, "yes": 0.012006621376434006}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9882166340861525, "res": {"Yes": 0.9882166340861525, "yes": 0.008333022862300728}, "ground_truth": 1}, {"key": "38903688", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9863091243253899, "res": {"Yes": 0.9863091243253899, "yes": 0.011196556537086568}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9825978856903559, "res": {"Yes": 0.9825978856903559, "yes": 0.014779103819140495}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9788868453509593, "res": {"Yes": 0.9788868453509593, "yes": 0.013375053315532152}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9840198513472447, "res": {"Yes": 0.9840198513472447, "yes": 0.012677861444418546}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9563949056513839, "res": {"Yes": 0.9563949056513839, "yes": 0.03634681860557263}, "ground_truth": 1}, {"key": "28071228", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9869476998102406, "res": {"Yes": 0.9869476998102406, "yes": 0.011057622166387324}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.982162781588657, "res": {"Yes": 0.982162781588657, "yes": 0.009897560166926904}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8553788746090178, "res": {"Yes": 0.8553788746090178, "yes": 0.13627167166437826}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9666815918017113, "res": {"Yes": 0.9666815918017113, "yes": 0.016123216936433055}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8750610269245215, "res": {"Yes": 0.8750610269245215, "yes": 0.119516959162752}, "ground_truth": 1}, {"key": "36855834", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9737593030544428, "res": {"Yes": 0.9737593030544428, "yes": 0.015794776419062594}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8748703806480778, "res": {"Yes": 0.8748703806480778, "yes": 0.11916182925553206}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8837332988482461, "res": {"Yes": 0.8837332988482461, "yes": 0.10155825543209719}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.910843341260241, "res": {"Yes": 0.910843341260241, "yes": 0.08462207606633275}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7907675364961875, "res": {"Yes": 0.7907675364961875, "yes": 0.18444314872082837}, "ground_truth": 1}, {"key": "40548717", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9005533781486591, "res": {"Yes": 0.9005533781486591, "yes": 0.09305725780096094}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9387370180257, "res": {"Yes": 0.9387370180257, "yes": 0.05516641691852628}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9286069249640398, "res": {"Yes": 0.9286069249640398, "yes": 0.06117299163651467}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5418083163314378, "res": {"Yes": 0.5418083163314378, "yes": 0.45044334955335186}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9708651954181501, "res": {"Yes": 0.9708651954181501, "yes": 0.025334887007526743}, "ground_truth": 1}, {"key": "37051175", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9698843785116059, "res": {"Yes": 0.9698843785116059, "yes": 0.02231202072020348}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6645162344840724, "res": {"Yes": 0.6645162344840724, "yes": 0.3224073083587655}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.24168160522062201, "res": {"yes": 0.7187513993145892, "Yes": 0.24168160522062201}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6807909261694692, "res": {"Yes": 0.6807909261694692, "yes": 0.31546816670983846}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8179803783616801, "res": {"Yes": 0.8179803783616801, "yes": 0.17790104682189076}, "ground_truth": 1}, {"key": "38882119", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6462248634056958, "res": {"Yes": 0.6462248634056958, "yes": 0.3496436800747172}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.3543535979910292, "res": {"yes": 0.6411896371675502, "Yes": 0.3543535979910292}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9548994519123658, "res": {"Yes": 0.9548994519123658, "yes": 0.03896670949776787}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7268360810355842, "res": {"Yes": 0.7268360810355842, "yes": 0.2667014262605184}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9595559822543254, "res": {"Yes": 0.9595559822543254, "yes": 0.028647459883893094}, "ground_truth": 1}, {"key": "19485402", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5143225660253635, "res": {"Yes": 0.5143225660253635, "yes": 0.47497163144722565}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7183192865430819, "res": {"Yes": 0.7183192865430819, "yes": 0.2726689928088738}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9217012286108062, "res": {"Yes": 0.9217012286108062, "yes": 0.0706627089479596}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9023082064392903, "res": {"Yes": 0.9023082064392903, "yes": 0.07960522082729633}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8926013226440271, "res": {"Yes": 0.8926013226440271, "yes": 0.07794629617513464}, "ground_truth": 1}, {"key": "36060907", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8935850602294654, "res": {"Yes": 0.8935850602294654, "yes": 0.09901301231098494}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9232626626909038, "res": {"Yes": 0.9232626626909038, "yes": 0.06753067769365091}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9095435615674745, "res": {"Yes": 0.9095435615674745, "yes": 0.07700832084844629}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9155212992340228, "res": {"Yes": 0.9155212992340228, "yes": 0.07845343616980409}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9475641593163681, "res": {"Yes": 0.9475641593163681, "yes": 0.043790775009671164}, "ground_truth": 1}, {"key": "24037309", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8917050468947725, "res": {"Yes": 0.8917050468947725, "yes": 0.09368290034890654}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9009799217755117, "res": {"Yes": 0.9009799217755117, "yes": 0.08386725732691618}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.446131628205013, "res": {"yes": 0.5322008091780105, "Yes": 0.446131628205013}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5870284462126559, "res": {"Yes": 0.5870284462126559, "yes": 0.3846762144357771}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.861244450686367, "res": {"Yes": 0.861244450686367, "yes": 0.12573666743462622}, "ground_truth": 1}, {"key": "35605805", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7596884606039478, "res": {"Yes": 0.7596884606039478, "yes": 0.20435354938022215}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6057202398007687, "res": {"Yes": 0.6057202398007687, "yes": 0.3734689165353483}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.818753833423227, "res": {"Yes": 0.818753833423227, "yes": 0.1725812567152186}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9042514993833171, "res": {"Yes": 0.9042514993833171, "yes": 0.09448747433378202}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8534767994094474, "res": {"Yes": 0.8534767994094474, "yes": 0.1353322788266739}, "ground_truth": 1}, {"key": "17706248", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8276523002709469, "res": {"Yes": 0.8276523002709469, "yes": 0.16507832842452067}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9913735710119423, "res": {"Yes": 0.9913735710119423, "yes": 0.005976251781065961}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8109302122897517, "res": {"Yes": 0.8109302122897517, "yes": 0.18311377463704404}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8198631441752553, "res": {"Yes": 0.8198631441752553, "yes": 0.17427366987025208}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7956118879458604, "res": {"Yes": 0.7956118879458604, "yes": 0.19928175527159236}, "ground_truth": 1}, {"key": "36883559", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.981114583976224, "res": {"Yes": 0.981114583976224, "yes": 0.01558151505010539}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7174731526197484, "res": {"Yes": 0.7174731526197484, "yes": 0.2765648905557071}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8547447095093882, "res": {"Yes": 0.8547447095093882, "yes": 0.14168841398854295}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8970941288737123, "res": {"Yes": 0.8970941288737123, "yes": 0.10062450122760425}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7215797307093358, "res": {"Yes": 0.7215797307093358, "yes": 0.27452773085056986}, "ground_truth": 1}, {"key": "32799471", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7976155536184538, "res": {"Yes": 0.7976155536184538, "yes": 0.1929719765196025}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7105675230500152, "res": {"Yes": 0.7105675230500152, "yes": 0.28543783911048964}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8559329203398336, "res": {"Yes": 0.8559329203398336, "yes": 0.14041194677786872}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7938392136035533, "res": {"Yes": 0.7938392136035533, "yes": 0.1960424032574832}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7913914292087963, "res": {"Yes": 0.7913914292087963, "yes": 0.19926045319057034}, "ground_truth": 1}, {"key": "34797243", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8338721130901029, "res": {"Yes": 0.8338721130901029, "yes": 0.16113694266065975}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9143350639052314, "res": {"Yes": 0.9143350639052314, "yes": 0.08138972031539952}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.741941447984872, "res": {"Yes": 0.741941447984872, "yes": 0.2532387143813283}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7664831683585671, "res": {"Yes": 0.7664831683585671, "yes": 0.22767623070697174}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8931161226477751, "res": {"Yes": 0.8931161226477751, "yes": 0.0993553759723869}, "ground_truth": 1}, {"key": "32154876", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7857776693411411, "res": {"Yes": 0.7857776693411411, "yes": 0.204761580219438}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8672729591278667, "res": {"Yes": 0.8672729591278667, "yes": 0.1255780250560632}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8860953231135233, "res": {"Yes": 0.8860953231135233, "yes": 0.111099242560173}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7332260019448127, "res": {"Yes": 0.7332260019448127, "yes": 0.2531772861207686}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7545524929751265, "res": {"Yes": 0.7545524929751265, "yes": 0.2395872327128758}, "ground_truth": 1}, {"key": "37962274", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7541666456074418, "res": {"Yes": 0.7541666456074418, "yes": 0.24189058092785712}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7088370001068579, "res": {"Yes": 0.7088370001068579, "yes": 0.2867633407727332}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9874651440444472, "res": {"Yes": 0.9874651440444472, "yes": 0.010927405272391492}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9777970439562657, "res": {"Yes": 0.9777970439562657, "yes": 0.019324282756440197}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9883016194297056, "res": {"Yes": 0.9883016194297056, "yes": 0.009857149002483802}, "ground_truth": 1}, {"key": "35574030", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7904314763690482, "res": {"Yes": 0.7904314763690482, "yes": 0.20610455302305808}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9787624631658361, "res": {"Yes": 0.9787624631658361, "yes": 0.018234658183050173}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8640674293449272, "res": {"Yes": 0.8640674293449272, "yes": 0.13426304242937914}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9602342383516751, "res": {"Yes": 0.9602342383516751, "yes": 0.033988236483917245}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9367817013819929, "res": {"Yes": 0.9367817013819929, "yes": 0.05926573290223083}, "ground_truth": 1}, {"key": "39105949", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9545088538077546, "res": {"Yes": 0.9545088538077546, "yes": 0.03880502230030995}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7674892163329604, "res": {"Yes": 0.7674892163329604, "yes": 0.22385557265733552}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8867804252056602, "res": {"Yes": 0.8867804252056602, "yes": 0.10639126513929857}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7940879465118794, "res": {"Yes": 0.7940879465118794, "yes": 0.19706483356426238}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6742299527973629, "res": {"Yes": 0.6742299527973629, "yes": 0.31486899989657885}, "ground_truth": 1}, {"key": "41064322", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9750688988439529, "res": {"Yes": 0.9750688988439529, "yes": 0.019167203721257117}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7801633641580363, "res": {"Yes": 0.7801633641580363, "yes": 0.21275823603713478}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6170377955399268, "res": {"Yes": 0.6170377955399268, "yes": 0.25882321963435023}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5128305501370254, "res": {"Yes": 0.5128305501370254, "yes": 0.4253692515896056}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6709284228869347, "res": {"Yes": 0.6709284228869347, "yes": 0.23149900581516367}, "ground_truth": 1}, {"key": "28105101", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.362843560651801, "res": {"yes": 0.4992740378117574, "Yes": 0.362843560651801}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.38622638183210645, "res": {"yes": 0.5550182960073794, "Yes": 0.38622638183210645}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8709067662952352, "res": {"Yes": 0.8709067662952352, "yes": 0.12511007865078008}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7085810285878386, "res": {"Yes": 0.7085810285878386, "yes": 0.2871887252685913}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5955586537945929, "res": {"Yes": 0.5955586537945929, "yes": 0.3989935330972248}, "ground_truth": 1}, {"key": "36036068", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7652179528861525, "res": {"Yes": 0.7652179528861525, "yes": 0.22606170183571783}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8139025154832233, "res": {"Yes": 0.8139025154832233, "yes": 0.18059789385494587}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9564423440151658, "res": {"Yes": 0.9564423440151658, "yes": 0.040541619117160706}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7392926857118102, "res": {"Yes": 0.7392926857118102, "yes": 0.25532490693864374}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8020711173877118, "res": {"Yes": 0.8020711173877118, "yes": 0.18969618745552197}, "ground_truth": 1}, {"key": "37991460", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8871968353153176, "res": {"Yes": 0.8871968353153176, "yes": 0.1084808737871439}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6635881353799226, "res": {"Yes": 0.6635881353799226, "yes": 0.3273612754686958}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6858641931663446, "res": {"Yes": 0.6858641931663446, "yes": 0.24211276264711862}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5017632921886926, "res": {"Yes": 0.5017632921886926, "yes": 0.46436773886177846}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7961126683506168, "res": {"Yes": 0.7961126683506168, "yes": 0.15401706053648662}, "ground_truth": 1}, {"key": "38437830", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.615371886766534, "res": {"Yes": 0.615371886766534, "yes": 0.32940776116342485}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.32665039776043725, "res": {"yes": 0.6589156168150577, "Yes": 0.32665039776043725}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9802454377405714, "res": {"Yes": 0.9802454377405714, "yes": 0.014311669865693752}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6879655603035846, "res": {"Yes": 0.6879655603035846, "yes": 0.3049627624976428}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9714400478303782, "res": {"Yes": 0.9714400478303782, "yes": 0.02235008845974426}, "ground_truth": 1}, {"key": "36507138", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9705064503265021, "res": {"Yes": 0.9705064503265021, "yes": 0.025396416438741524}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5999935342944325, "res": {"Yes": 0.5999935342944325, "yes": 0.3900056900863022}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.643941400519525, "res": {"Yes": 0.643941400519525, "yes": 0.34964375000346015}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6294863119846444, "res": {"Yes": 0.6294863119846444, "yes": 0.36351944122998603}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5593336972486364, "res": {"Yes": 0.5593336972486364, "yes": 0.4314856608281671}, "ground_truth": 1}, {"key": "37824866", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5292373505295391, "res": {"Yes": 0.5292373505295391, "yes": 0.40855478543603907}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.690552808788972, "res": {"Yes": 0.690552808788972, "yes": 0.2756943650121599}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9257544752607423, "res": {"Yes": 0.9257544752607423, "yes": 0.07004477489913093}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9059346778878347, "res": {"Yes": 0.9059346778878347, "yes": 0.08544067530079907}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9232759808508701, "res": {"Yes": 0.9232759808508701, "yes": 0.06021686598757133}, "ground_truth": 1}, {"key": "25088134", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8736849302711097, "res": {"Yes": 0.8736849302711097, "yes": 0.11991219648242935}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8673313372354388, "res": {"Yes": 0.8673313372354388, "yes": 0.12396719559875469}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.4947231330831261, "res": {"yes": 0.4963828872284739, "Yes": 0.4947231330831261}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6632788426929357, "res": {"Yes": 0.6632788426929357, "yes": 0.3303863340582196}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9703241481089272, "res": {"Yes": 0.9703241481089272, "yes": 0.01953490135778019}, "ground_truth": 1}, {"key": "40172531", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8012815384194236, "res": {"Yes": 0.8012815384194236, "yes": 0.1919628913217765}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6284637686175907, "res": {"Yes": 0.6284637686175907, "yes": 0.36131410572670025}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8649118281675772, "res": {"Yes": 0.8649118281675772, "yes": 0.12625160475206562}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8704343143515323, "res": {"Yes": 0.8704343143515323, "yes": 0.12084131987771751}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7481980375249196, "res": {"Yes": 0.7481980375249196, "yes": 0.21034995139771406}, "ground_truth": 1}, {"key": "37035874", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8944333318513951, "res": {"Yes": 0.8944333318513951, "yes": 0.09540421682355354}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8830395887603726, "res": {"Yes": 0.8830395887603726, "yes": 0.1098490421288205}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7332628621424038, "res": {"Yes": 0.7332628621424038, "yes": 0.2583625623801302}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8776089954646337, "res": {"Yes": 0.8776089954646337, "yes": 0.12075659149367315}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7796776764689124, "res": {"Yes": 0.7796776764689124, "yes": 0.21258482368129575}, "ground_truth": 1}, {"key": "36404465", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8476900644782901, "res": {"Yes": 0.8476900644782901, "yes": 0.1433340727865523}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8033844359283193, "res": {"Yes": 0.8033844359283193, "yes": 0.18991062732237993}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9862881299728131, "res": {"Yes": 0.9862881299728131, "yes": 0.011824856263317553}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9916310473046446, "res": {"Yes": 0.9916310473046446, "yes": 0.005486637984650228}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8543779574275329, "res": {"Yes": 0.8543779574275329, "yes": 0.14352682894045687}, "ground_truth": 1}, {"key": "39602052", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9879688451306757, "res": {"Yes": 0.9879688451306757, "yes": 0.009975796198431289}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9824363166485353, "res": {"Yes": 0.9824363166485353, "yes": 0.013581142898135232}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9621375963457769, "res": {"Yes": 0.9621375963457769, "yes": 0.034232845536310695}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9441390320190753, "res": {"Yes": 0.9441390320190753, "yes": 0.05091725575546684}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9142024345950853, "res": {"Yes": 0.9142024345950853, "yes": 0.08320450383399161}, "ground_truth": 1}, {"key": "33792789", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9573691645800935, "res": {"Yes": 0.9573691645800935, "yes": 0.03668627030674476}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9636087767414033, "res": {"Yes": 0.9636087767414033, "yes": 0.03360305142218687}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7997566620672176, "res": {"Yes": 0.7997566620672176, "yes": 0.19579842359817126}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6270193610238753, "res": {"Yes": 0.6270193610238753, "yes": 0.35925986299315255}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.739373354337545, "res": {"Yes": 0.739373354337545, "yes": 0.25024691213094946}, "ground_truth": 1}, {"key": "32776626", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7422268758125443, "res": {"Yes": 0.7422268758125443, "yes": 0.24898547937024734}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6096270757473484, "res": {"Yes": 0.6096270757473484, "yes": 0.3839655553603808}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9884113260097084, "res": {"Yes": 0.9884113260097084, "yes": 0.006939892461809348}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9801960718629285, "res": {"Yes": 0.9801960718629285, "yes": 0.016725646817894315}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8985207760387319, "res": {"Yes": 0.8985207760387319, "yes": 0.09767635164000694}, "ground_truth": 1}, {"key": "37195090", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7811514264782408, "res": {"Yes": 0.7811514264782408, "yes": 0.21290560085157825}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8501619059908861, "res": {"Yes": 0.8501619059908861, "yes": 0.14101933081814677}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8072213426039253, "res": {"Yes": 0.8072213426039253, "yes": 0.18483992476645172}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.829215687302678, "res": {"Yes": 0.829215687302678, "yes": 0.1669491840493074}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.869768886782899, "res": {"Yes": 0.869768886782899, "yes": 0.12436871716942757}, "ground_truth": 1}, {"key": "33981824", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8672813023338644, "res": {"Yes": 0.8672813023338644, "yes": 0.1293543165268069}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8328081628802565, "res": {"Yes": 0.8328081628802565, "yes": 0.16149466030386275}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9203207209141472, "res": {"Yes": 0.9203207209141472, "yes": 0.07269342781306723}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8878974649992685, "res": {"Yes": 0.8878974649992685, "yes": 0.10710009655373881}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9454180276480659, "res": {"Yes": 0.9454180276480659, "yes": 0.04856193914784921}, "ground_truth": 1}, {"key": "39569142", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8436173196042622, "res": {"Yes": 0.8436173196042622, "yes": 0.1475232187996425}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9000538357021366, "res": {"Yes": 0.9000538357021366, "yes": 0.09087352266880665}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7687999933314706, "res": {"Yes": 0.7687999933314706, "yes": 0.22202399885546847}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.44766768337567897, "res": {"yes": 0.5461846683497924, "Yes": 0.44766768337567897}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6374678083056575, "res": {"Yes": 0.6374678083056575, "yes": 0.3592993118918141}, "ground_truth": 1}, {"key": "40268210", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6370147934839302, "res": {"Yes": 0.6370147934839302, "yes": 0.3554604811688582}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6531065715732813, "res": {"Yes": 0.6531065715732813, "yes": 0.33962048683338647}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9737394025389203, "res": {"Yes": 0.9737394025389203, " Yes": 0.011132498004704582}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8646726179922762, "res": {"Yes": 0.8646726179922762, "yes": 0.13249123323618747}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9646909943800251, "res": {"Yes": 0.9646909943800251, "yes": 0.027454900565069534}, "ground_truth": 1}, {"key": "34925159", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.79900801657969, "res": {"Yes": 0.79900801657969, "yes": 0.1959624342717366}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7808761425524626, "res": {"Yes": 0.7808761425524626, "yes": 0.21406270347198136}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8292825911231935, "res": {"Yes": 0.8292825911231935, "yes": 0.16266002378797073}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9321160414998314, "res": {"Yes": 0.9321160414998314, "yes": 0.06531960879184477}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9031741563374308, "res": {"Yes": 0.9031741563374308, "yes": 0.09261875685617599}, "ground_truth": 1}, {"key": "36181903", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8592432319311027, "res": {"Yes": 0.8592432319311027, "yes": 0.13523883709550502}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9101510629559129, "res": {"Yes": 0.9101510629559129, "yes": 0.08723483882914837}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9769065572905806, "res": {"Yes": 0.9769065572905806, "yes": 0.0127333811093699}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9373536537401833, "res": {"Yes": 0.9373536537401833, "yes": 0.04559850263284299}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.846501369137781, "res": {"Yes": 0.846501369137781, "yes": 0.15050770501360422}, "ground_truth": 1}, {"key": "38620559", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7005177321245828, "res": {"Yes": 0.7005177321245828, "yes": 0.2937493283483748}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8775424313468038, "res": {"Yes": 0.8775424313468038, "yes": 0.1198585118217371}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7230845502667217, "res": {"Yes": 0.7230845502667217, "yes": 0.2726911344300275}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7498087762206693, "res": {"Yes": 0.7498087762206693, "yes": 0.24287144707671837}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7410821841885437, "res": {"Yes": 0.7410821841885437, "yes": 0.25398336658340537}, "ground_truth": 1}, {"key": "32719657", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7264002264153222, "res": {"Yes": 0.7264002264153222, "yes": 0.2684302433744167}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7087275357736148, "res": {"Yes": 0.7087275357736148, "yes": 0.2857020769738131}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6962362267381564, "res": {"Yes": 0.6962362267381564, "yes": 0.2828889107281961}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5838536073052559, "res": {"Yes": 0.5838536073052559, "yes": 0.39741116664560505}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7160649097894453, "res": {"Yes": 0.7160649097894453, "yes": 0.268896158981131}, "ground_truth": 1}, {"key": "37530914", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7585158605476308, "res": {"Yes": 0.7585158605476308, "yes": 0.22645976223892014}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5498831928209087, "res": {"Yes": 0.5498831928209087, "yes": 0.43617523554161725}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7428241781647554, "res": {"Yes": 0.7428241781647554, "yes": 0.2537637405858966}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8108535505713383, "res": {"Yes": 0.8108535505713383, "yes": 0.18597130339394655}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7116090396477452, "res": {"Yes": 0.7116090396477452, "yes": 0.2855123481094988}, "ground_truth": 1}, {"key": "33306933", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8937739527976564, "res": {"Yes": 0.8937739527976564, "yes": 0.10372613193253975}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8588614347183645, "res": {"Yes": 0.8588614347183645, "yes": 0.13764762519920395}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7044394209894801, "res": {"Yes": 0.7044394209894801, "yes": 0.2914552212250467}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8808806969320965, "res": {"Yes": 0.8808806969320965, "yes": 0.11291845025103087}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8250973084224449, "res": {"Yes": 0.8250973084224449, "yes": 0.16792196525814215}, "ground_truth": 1}, {"key": "33837212", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.864534929841571, "res": {"Yes": 0.864534929841571, "yes": 0.12828796696867178}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.817421537017918, "res": {"Yes": 0.817421537017918, "yes": 0.17512664168635453}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9709665638904733, "res": {"Yes": 0.9709665638904733, "yes": 0.026459937033089284}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8627529307701999, "res": {"Yes": 0.8627529307701999, "yes": 0.13310859235897138}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9292746252698686, "res": {"Yes": 0.9292746252698686, "yes": 0.06715107145481365}, "ground_truth": 1}, {"key": "40945179", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9203245586595551, "res": {"Yes": 0.9203245586595551, "yes": 0.07608479781275501}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8960131587590817, "res": {"Yes": 0.8960131587590817, "yes": 0.09881053997591943}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5422640982902237, "res": {"Yes": 0.5422640982902237, "yes": 0.44926857836245787}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7570985568725299, "res": {"Yes": 0.7570985568725299, "yes": 0.23672418309008345}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.39876568110545607, "res": {"yes": 0.5968100442837782, "Yes": 0.39876568110545607}, "ground_truth": 1}, {"key": "34152358", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6169076266378242, "res": {"Yes": 0.6169076266378242, "yes": 0.3777242677264927}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6094331964033002, "res": {"Yes": 0.6094331964033002, "yes": 0.3864704767616166}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.745593823184965, "res": {"Yes": 0.745593823184965, "yes": 0.25004984524840984}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8146178918817559, "res": {"Yes": 0.8146178918817559, "yes": 0.18040205140385554}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7986929259923857, "res": {"Yes": 0.7986929259923857, "yes": 0.19518828932790827}, "ground_truth": 1}, {"key": "34136541", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8396656013802579, "res": {"Yes": 0.8396656013802579, "yes": 0.15428011503770764}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7125725125626482, "res": {"Yes": 0.7125725125626482, "yes": 0.27969641392613914}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.710130658372997, "res": {"Yes": 0.710130658372997, "yes": 0.28418165926713}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8763657958804637, "res": {"Yes": 0.8763657958804637, "yes": 0.12038883294923465}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6066758623345313, "res": {"Yes": 0.6066758623345313, "yes": 0.3879841851935521}, "ground_truth": 1}, {"key": "37469603", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8342340921309541, "res": {"Yes": 0.8342340921309541, "yes": 0.1633132080757545}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8186745818879879, "res": {"Yes": 0.8186745818879879, "yes": 0.17683581071428311}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7708702403784251, "res": {"Yes": 0.7708702403784251, "yes": 0.1937547102648117}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7101393930338131, "res": {"Yes": 0.7101393930338131, "yes": 0.25428802441868614}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.989309480163996, "res": {"Yes": 0.989309480163996, "yes": 0.008192186048044028}, "ground_truth": 1}, {"key": "37353611", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6879562590720857, "res": {"Yes": 0.6879562590720857, "yes": 0.23367881311954575}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9014372274399637, "res": {"Yes": 0.9014372274399637, "yes": 0.07387264366560764}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9781721616818931, "res": {"Yes": 0.9781721616818931, "yes": 0.01819670946266724}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5511663774301286, "res": {"Yes": 0.5511663774301286, "yes": 0.4278253617341572}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.953377300017802, "res": {"Yes": 0.953377300017802, "yes": 0.04196260560614887}, "ground_truth": 1}, {"key": "37211649", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7638776321398459, "res": {"Yes": 0.7638776321398459, "yes": 0.2131458086107815}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5780326060786919, "res": {"Yes": 0.5780326060786919, "yes": 0.28349204958484464}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6315436426994847, "res": {"Yes": 0.6315436426994847, "yes": 0.35877565581444204}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6620270916102071, "res": {"Yes": 0.6620270916102071, "yes": 0.3289869811409072}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6547441043919215, "res": {"Yes": 0.6547441043919215, "yes": 0.33992563699978295}, "ground_truth": 1}, {"key": "37320976", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9621399151001782, "res": {"Yes": 0.9621399151001782, "yes": 0.030916569779015364}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9689623425732816, "res": {"Yes": 0.9689623425732816, "yes": 0.024051483508203048}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7790723281413048, "res": {"Yes": 0.7790723281413048, "yes": 0.217024137263158}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7589399666784604, "res": {"Yes": 0.7589399666784604, "yes": 0.2380275156543849}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8211529990083298, "res": {"Yes": 0.8211529990083298, "yes": 0.17496617665430514}, "ground_truth": 1}, {"key": "34492412", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.916796610616525, "res": {"Yes": 0.916796610616525, "yes": 0.08193541205083602}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9067880397031346, "res": {"Yes": 0.9067880397031346, "yes": 0.09098359167382208}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6853394609775915, "res": {"Yes": 0.6853394609775915, "yes": 0.30165276737856134}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6816933879602195, "res": {"Yes": 0.6816933879602195, "yes": 0.30077704589732124}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6967760677306684, "res": {"Yes": 0.6967760677306684, "yes": 0.21746610058066024}, "ground_truth": 1}, {"key": "36655016", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.934867098909565, "res": {"Yes": 0.934867098909565, "yes": 0.06147732938597162}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7593295546290774, "res": {"Yes": 0.7593295546290774, "yes": 0.22915918177233446}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7939389975251002, "res": {"Yes": 0.7939389975251002, "yes": 0.20091983124742177}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9873461379671146, "res": {"Yes": 0.9873461379671146, "yes": 0.00853750387675352}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.843214934211908, "res": {"Yes": 0.843214934211908, "yes": 0.1472481087994723}, "ground_truth": 1}, {"key": "35220773", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7126768408151292, "res": {"Yes": 0.7126768408151292, "yes": 0.28194749875622094}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7773528603317406, "res": {"Yes": 0.7773528603317406, "yes": 0.21918343434668577}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7681343701542758, "res": {"Yes": 0.7681343701542758, "yes": 0.18840339440321044}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8004377047486141, "res": {"Yes": 0.8004377047486141, "yes": 0.19471126695378252}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6571841898108377, "res": {"Yes": 0.6571841898108377, "yes": 0.3078597982902583}, "ground_truth": 1}, {"key": "31569808", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8879379629261953, "res": {"Yes": 0.8879379629261953, "yes": 0.10792990395797987}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8222495592025583, "res": {"Yes": 0.8222495592025583, "yes": 0.17243341793617878}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8188873093642743, "res": {"Yes": 0.8188873093642743, "yes": 0.17353468257582536}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7435387768275696, "res": {"Yes": 0.7435387768275696, "yes": 0.24632836895429486}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7698867137179357, "res": {"Yes": 0.7698867137179357, "yes": 0.22062840896820224}, "ground_truth": 1}, {"key": "37696256", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.73874194047489, "res": {"Yes": 0.73874194047489, "yes": 0.24829428580576057}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.720717381368783, "res": {"Yes": 0.720717381368783, "yes": 0.25982496141797845}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.47208516752606267, "res": {"Yes": 0.47208516752606267, "yes": 0.35718833045940523}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7638378886205822, "res": {"Yes": 0.7638378886205822, "yes": 0.1920834050456209}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8436670270010987, "res": {"Yes": 0.8436670270010987, "yes": 0.10741734616849043}, "ground_truth": 1}, {"key": "36874328", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7917111130163152, "res": {"Yes": 0.7917111130163152, "yes": 0.15962339811201115}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5672689778489632, "res": {"Yes": 0.5672689778489632, "yes": 0.3442173120448098}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8862140111248412, "res": {"Yes": 0.8862140111248412, "yes": 0.11089083833345541}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9149910602252256, "res": {"Yes": 0.9149910602252256, "yes": 0.07808578101338104}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9174859844260561, "res": {"Yes": 0.9174859844260561, "yes": 0.07705384621748917}, "ground_truth": 1}, {"key": "24532377", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9439823217233172, "res": {"Yes": 0.9439823217233172, "yes": 0.052264391011635575}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9377811994803874, "res": {"Yes": 0.9377811994803874, "yes": 0.057524732110190976}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9769959854559972, "res": {"Yes": 0.9769959854559972, "yes": 0.017503092489132052}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6622432787462473, "res": {"Yes": 0.6622432787462473, "yes": 0.32851085416669507}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.49819379055064805, "res": {"Yes": 0.49819379055064805, "yes": 0.489491050558976}, "ground_truth": 1}, {"key": "39560618", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.47142769484702923, "res": {"Yes": 0.47142769484702923, "yes": 0.4030543107445833}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.620400867121125, "res": {"Yes": 0.620400867121125, "yes": 0.31723803114075844}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7476098933558465, "res": {"Yes": 0.7476098933558465, "yes": 0.24296302687397336}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9700978202022027, "res": {"Yes": 0.9700978202022027, " Yes": 0.01752830983974276}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9634607932152781, "res": {"Yes": 0.9634607932152781, "yes": 0.023330216805502846}, "ground_truth": 1}, {"key": "34922693", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9629204153679168, "res": {"Yes": 0.9629204153679168, "yes": 0.024361221346997693}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6530273349599387, "res": {"Yes": 0.6530273349599387, "yes": 0.34028730618759384}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9569813506721426, "res": {"Yes": 0.9569813506721426, "yes": 0.03623695588839649}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9396958405146586, "res": {"Yes": 0.9396958405146586, "yes": 0.05127084561264669}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.925913164102881, "res": {"Yes": 0.925913164102881, "yes": 0.06842543272485863}, "ground_truth": 1}, {"key": "33629577", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9345331866341123, "res": {"Yes": 0.9345331866341123, "yes": 0.056629263750025326}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.921714395206899, "res": {"Yes": 0.921714395206899, "yes": 0.07371707677838636}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.4386115103301081, "res": {"Yes": 0.4386115103301081, "yes": 0.4189021829057976}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.4299673944628972, "res": {"Yes": 0.4299673944628972, "yes": 0.40066171948761686}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.49869766929611736, "res": {"Yes": 0.49869766929611736, "yes": 0.34602962022563255}, "ground_truth": 1}, {"key": "32284359", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.355235510360254, "res": {"yes": 0.48067986105419686, "Yes": 0.355235510360254}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.2534482294206493, "res": {"yes": 0.6330169499653614, "Yes": 0.2534482294206493}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8952489213171433, "res": {"Yes": 0.8952489213171433, "yes": 0.09728327457426121}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9352666062162452, "res": {"Yes": 0.9352666062162452, "yes": 0.05974645024863806}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9414726544212912, "res": {"Yes": 0.9414726544212912, "yes": 0.05386185236335781}, "ground_truth": 1}, {"key": "28082962", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8966102995371881, "res": {"Yes": 0.8966102995371881, "yes": 0.09428597762768291}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9019113458024309, "res": {"Yes": 0.9019113458024309, "yes": 0.08906317763631624}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9836697570642758, "res": {"Yes": 0.9836697570642758, "yes": 0.012458060946556064}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9575693666154542, "res": {"Yes": 0.9575693666154542, "yes": 0.03785699907792667}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9585371612826932, "res": {"Yes": 0.9585371612826932, "yes": 0.03629686881397493}, "ground_truth": 1}, {"key": "24796803", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9539641632184103, "res": {"Yes": 0.9539641632184103, "yes": 0.04044038761237739}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.992002084885877, "res": {"Yes": 0.992002084885877, "yes": 0.0038340112503477035}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.64043672662236, "res": {"Yes": 0.64043672662236, "yes": 0.34447374614847703}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8810719392704789, "res": {"Yes": 0.8810719392704789, "yes": 0.11452325227128456}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9646439687696426, "res": {"Yes": 0.9646439687696426, "yes": 0.03377203114370453}, "ground_truth": 1}, {"key": "35466150", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9881680911127343, "res": {"Yes": 0.9881680911127343, "yes": 0.005826921282546837}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9930339647923621, "res": {"Yes": 0.9930339647923621, "yes": 0.004886367959686871}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9203836269852121, "res": {"Yes": 0.9203836269852121, "yes": 0.07368868642874372}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9235810363793454, "res": {"Yes": 0.9235810363793454, "yes": 0.07085074483901514}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9741202451305904, "res": {"Yes": 0.9741202451305904, "yes": 0.01945969257771275}, "ground_truth": 1}, {"key": "35754289", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9565876195103167, "res": {"Yes": 0.9565876195103167, "yes": 0.03767693030809007}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9519732874363912, "res": {"Yes": 0.9519732874363912, "yes": 0.04233241814700607}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9289161245329572, "res": {"Yes": 0.9289161245329572, "yes": 0.07055368990969935}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7818065715439013, "res": {"Yes": 0.7818065715439013, "yes": 0.1950427526675437}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9118965658298379, "res": {"Yes": 0.9118965658298379, "yes": 0.07811012414500289}, "ground_truth": 1}, {"key": "36678662", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9275435327708437, "res": {"Yes": 0.9275435327708437, "yes": 0.07183920653073374}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7401723311232142, "res": {"Yes": 0.7401723311232142, "yes": 0.14420587686548422}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8313181046185517, "res": {"Yes": 0.8313181046185517, "yes": 0.15227799714557405}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8236970536591446, "res": {"Yes": 0.8236970536591446, "yes": 0.16379102524472353}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8783038189085794, "res": {"Yes": 0.8783038189085794, "yes": 0.11259187769790084}, "ground_truth": 1}, {"key": "35399671", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7013905811562697, "res": {"Yes": 0.7013905811562697, "yes": 0.2856569110459042}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8266178356616823, "res": {"Yes": 0.8266178356616823, "yes": 0.1631824302548152}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9069141101393672, "res": {"Yes": 0.9069141101393672, "yes": 0.09014123147581991}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9148698173033797, "res": {"Yes": 0.9148698173033797, "yes": 0.08303227428164862}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9066753238879158, "res": {"Yes": 0.9066753238879158, "yes": 0.0883701531853058}, "ground_truth": 1}, {"key": "36888180", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9066324028940309, "res": {"Yes": 0.9066324028940309, "yes": 0.08668800113383159}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8544568799644416, "res": {"Yes": 0.8544568799644416, "yes": 0.13968659122042162}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7958865964965317, "res": {"Yes": 0.7958865964965317, "yes": 0.19223590293902562}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8092491876556603, "res": {"Yes": 0.8092491876556603, "yes": 0.17888767215159956}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8496312305908867, "res": {"Yes": 0.8496312305908867, "yes": 0.13557833332250013}, "ground_truth": 1}, {"key": "28061069", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7891499227715839, "res": {"Yes": 0.7891499227715839, "yes": 0.19974424615143876}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.970779695085452, "res": {"Yes": 0.970779695085452, "yes": 0.01898586545661548}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9350595751362614, "res": {"Yes": 0.9350595751362614, "yes": 0.05876654785506819}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9170358478131133, "res": {"Yes": 0.9170358478131133, "yes": 0.07766770850088736}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.887795886461209, "res": {"Yes": 0.887795886461209, "yes": 0.10806990421404193}, "ground_truth": 1}, {"key": "22259982", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8578705800277815, "res": {"Yes": 0.8578705800277815, "yes": 0.13660333743085773}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9218598532147977, "res": {"Yes": 0.9218598532147977, "yes": 0.07166758683835911}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.62762003213385, "res": {"Yes": 0.62762003213385, "yes": 0.36872733781114075}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8866887590172824, "res": {"Yes": 0.8866887590172824, "yes": 0.10917858969049117}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8066899058576833, "res": {"Yes": 0.8066899058576833, "yes": 0.18691933851931194}, "ground_truth": 1}, {"key": "34026805", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7897943925422547, "res": {"Yes": 0.7897943925422547, "yes": 0.20385310867848805}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5520283363804683, "res": {"Yes": 0.5520283363804683, "yes": 0.44294878283004047}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8270225393307762, "res": {"Yes": 0.8270225393307762, "yes": 0.16425245089394636}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9397900591427295, "res": {"Yes": 0.9397900591427295, "yes": 0.041902600061733235}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8168256905051657, "res": {"Yes": 0.8168256905051657, "yes": 0.17656741868331677}, "ground_truth": 1}, {"key": "36713809", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8941987885227947, "res": {"Yes": 0.8941987885227947, "yes": 0.10042089257464226}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6865024739953853, "res": {"Yes": 0.6865024739953853, "yes": 0.30750931484976685}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8814580214522001, "res": {"Yes": 0.8814580214522001, "yes": 0.11291510791437026}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8497518188118699, "res": {"Yes": 0.8497518188118699, "yes": 0.14600062156329796}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7474809940953042, "res": {"Yes": 0.7474809940953042, "yes": 0.24755200446371348}, "ground_truth": 1}, {"key": "39726411", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7136606426749799, "res": {"Yes": 0.7136606426749799, "yes": 0.2790532653917995}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7847038370736638, "res": {"Yes": 0.7847038370736638, "yes": 0.20826357791262337}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7117881385967527, "res": {"Yes": 0.7117881385967527, "yes": 0.2062269742499083}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7629111291793693, "res": {"Yes": 0.7629111291793693, "yes": 0.12433960586034987}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8523984901943793, "res": {"Yes": 0.8523984901943793, "yes": 0.11629630398164044}, "ground_truth": 1}, {"key": "37069841", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9932338504697549, "res": {"Yes": 0.9932338504697549, "yes": 0.005328859719478466}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8692535275055115, "res": {"Yes": 0.8692535275055115, "yes": 0.07390182912360406}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.46498706080860075, "res": {"yes": 0.47884185148582925, "Yes": 0.46498706080860075}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5505637366264943, "res": {"Yes": 0.5505637366264943, "yes": 0.43622679450153273}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5908771802552122, "res": {"Yes": 0.5908771802552122, "yes": 0.3402421870822562}, "ground_truth": 1}, {"key": "38894693", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6225026273559828, "res": {"Yes": 0.6225026273559828, "yes": 0.3327536009424605}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6539923204663981, "res": {"Yes": 0.6539923204663981, "yes": 0.28410593494564934}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9916883682107162, "res": {"Yes": 0.9916883682107162, "yes": 0.006498190665392725}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6955584853976782, "res": {"Yes": 0.6955584853976782, "yes": 0.2457058967368533}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9932290290263769, "res": {"Yes": 0.9932290290263769, "yes": 0.004226336655674018}, "ground_truth": 1}, {"key": "33946032", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8518276426302777, "res": {"Yes": 0.8518276426302777, "yes": 0.1441240920719885}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9900741833342049, "res": {"Yes": 0.9900741833342049, "yes": 0.006707167153028321}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8523732766199499, "res": {"Yes": 0.8523732766199499, "yes": 0.13953069026750378}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9082827864920179, "res": {"Yes": 0.9082827864920179, "yes": 0.08589502223853862}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9042271337506943, "res": {"Yes": 0.9042271337506943, "yes": 0.08919228804753349}, "ground_truth": 1}, {"key": "39035311", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7677262152133226, "res": {"Yes": 0.7677262152133226, "yes": 0.2258256584945174}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8654643459130645, "res": {"Yes": 0.8654643459130645, "yes": 0.1297282631084476}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8799957157391277, "res": {"Yes": 0.8799957157391277, "yes": 0.10870907518255048}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8961521501393346, "res": {"Yes": 0.8961521501393346, "yes": 0.08955045227121561}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8541134914958091, "res": {"Yes": 0.8541134914958091, "yes": 0.12922169290319982}, "ground_truth": 1}, {"key": "27680038", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9454546878882701, "res": {"Yes": 0.9454546878882701, "yes": 0.04724298507903565}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6593223327567339, "res": {"Yes": 0.6593223327567339, "yes": 0.3353864600388105}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8560000880313826, "res": {"Yes": 0.8560000880313826, "yes": 0.13781768368603994}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9810452765656587, "res": {"Yes": 0.9810452765656587, "yes": 0.011790218876695046}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.762446970350799, "res": {"Yes": 0.762446970350799, "yes": 0.23074340193634407}, "ground_truth": 1}, {"key": "36901907", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8400180745960695, "res": {"Yes": 0.8400180745960695, "yes": 0.15338783433594919}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.920384538165454, "res": {"Yes": 0.920384538165454, "yes": 0.07166044910217517}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9187315870151275, "res": {"Yes": 0.9187315870151275, "yes": 0.07599656781838275}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8781039484875514, "res": {"Yes": 0.8781039484875514, "yes": 0.11614468274750171}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9017367481583606, "res": {"Yes": 0.9017367481583606, "yes": 0.09259700326524975}, "ground_truth": 1}, {"key": "21530542", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8915659303595771, "res": {"Yes": 0.8915659303595771, "yes": 0.10165891905124419}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8824316178786532, "res": {"Yes": 0.8824316178786532, "yes": 0.10958327680569194}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9097352189477352, "res": {"Yes": 0.9097352189477352, "yes": 0.08056791384355096}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9773182245059007, "res": {"Yes": 0.9773182245059007, "yes": 0.01648276977090309}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7625118192232866, "res": {"Yes": 0.7625118192232866, "yes": 0.22324083783511173}, "ground_truth": 1}, {"key": "38192532", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.835686304639516, "res": {"Yes": 0.835686304639516, "yes": 0.14802485205860402}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9447381876308076, "res": {"Yes": 0.9447381876308076, "yes": 0.048885521998014755}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9426432309767693, "res": {"Yes": 0.9426432309767693, "yes": 0.04862513989644272}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9360420250076561, "res": {"Yes": 0.9360420250076561, "yes": 0.0566148307902543}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9334525103293408, "res": {"Yes": 0.9334525103293408, "yes": 0.051197457826271636}, "ground_truth": 1}, {"key": "34102400", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9608763580179357, "res": {"Yes": 0.9608763580179357, "yes": 0.032867278190924804}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9413499988811267, "res": {"Yes": 0.9413499988811267, "yes": 0.04889094859212694}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7096009068599896, "res": {"Yes": 0.7096009068599896, "yes": 0.2494813290643854}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5744569208330533, "res": {"Yes": 0.5744569208330533, "yes": 0.3374054689171512}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8290132175544012, "res": {"Yes": 0.8290132175544012, "yes": 0.1642743636324428}, "ground_truth": 1}, {"key": "36133399", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8332607504894406, "res": {"Yes": 0.8332607504894406, "yes": 0.16225845724172355}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5437526951466428, "res": {"Yes": 0.5437526951466428, "yes": 0.4407216244165598}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.969288027749067, "res": {"Yes": 0.969288027749067, "yes": 0.028285697239133302}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8710392499992491, "res": {"Yes": 0.8710392499992491, "yes": 0.12343871264197044}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8592500027944475, "res": {"Yes": 0.8592500027944475, "yes": 0.13750215352474685}, "ground_truth": 1}, {"key": "34314544", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9849695943171992, "res": {"Yes": 0.9849695943171992, "yes": 0.013468705441113324}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.762166822569503, "res": {"Yes": 0.762166822569503, "yes": 0.2332645410843905}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9855269817720734, "res": {"Yes": 0.9855269817720734, " Yes": 0.00675646604520088}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.905850733340329, "res": {"Yes": 0.905850733340329, "yes": 0.09100037969513325}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.928964674673991, "res": {"Yes": 0.928964674673991, "yes": 0.06993110750880661}, "ground_truth": 1}, {"key": "33460074", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8191696776248948, "res": {"Yes": 0.8191696776248948, "yes": 0.1676666830226359}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9788437821292189, "res": {"Yes": 0.9788437821292189, "yes": 0.01938522695818005}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9090995998636545, "res": {"Yes": 0.9090995998636545, "yes": 0.08604312972600847}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8370658313770932, "res": {"Yes": 0.8370658313770932, "yes": 0.14812619494293217}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9149592227502056, "res": {"Yes": 0.9149592227502056, "yes": 0.07597183496104876}, "ground_truth": 1}, {"key": "36191495", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8713376407507574, "res": {"Yes": 0.8713376407507574, "yes": 0.12388437599918767}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6378689654746198, "res": {"Yes": 0.6378689654746198, "yes": 0.3477820460055707}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9905104724568796, "res": {"Yes": 0.9905104724568796, "yes": 0.006655118640641349}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8805869193657742, "res": {"Yes": 0.8805869193657742, "yes": 0.11665306566232296}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9868383372605034, "res": {"Yes": 0.9868383372605034, "yes": 0.010799839565592844}, "ground_truth": 1}, {"key": "39532668", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9854751128809626, "res": {"Yes": 0.9854751128809626, "yes": 0.01257817009169352}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.980368822136704, "res": {"Yes": 0.980368822136704, "yes": 0.01666780274241996}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.4736194631872058, "res": {"Yes": 0.4736194631872058, "yes": 0.4007193387856074}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.3987068515568282, "res": {"yes": 0.5495164331193004, "Yes": 0.3987068515568282}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6920071022846581, "res": {"Yes": 0.6920071022846581, "yes": 0.2510920183656074}, "ground_truth": 1}, {"key": "20328247", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5013034864095897, "res": {"Yes": 0.5013034864095897, "yes": 0.4479779081268016}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7266134852362499, "res": {"Yes": 0.7266134852362499, "yes": 0.21304250056666021}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7898909745054757, "res": {"Yes": 0.7898909745054757, "yes": 0.19934256541124076}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6393420617038351, "res": {"Yes": 0.6393420617038351, "yes": 0.342969519567346}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6153726005983366, "res": {"Yes": 0.6153726005983366, "yes": 0.37241602604973006}, "ground_truth": 1}, {"key": "39112675", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.780692213224528, "res": {"Yes": 0.780692213224528, "yes": 0.21310009373792987}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5394175676144201, "res": {"Yes": 0.5394175676144201, "yes": 0.4557560184114362}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.41026044058999783, "res": {"yes": 0.5748848609856111, "Yes": 0.41026044058999783}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5910372114628729, "res": {"Yes": 0.5910372114628729, "yes": 0.3985915143760169}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.44200490485897637, "res": {"yes": 0.5527607009260229, "Yes": 0.44200490485897637}, "ground_truth": 1}, {"key": "31620300", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7434888796145765, "res": {"Yes": 0.7434888796145765, "yes": 0.2505619960139356}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6913090049538226, "res": {"Yes": 0.6913090049538226, "yes": 0.28413477316156105}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8820949550352593, "res": {"Yes": 0.8820949550352593, "yes": 0.11185909277307045}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6236776691019787, "res": {"Yes": 0.6236776691019787, "yes": 0.3701971934662207}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8321431314201414, "res": {"Yes": 0.8321431314201414, "yes": 0.16179993113109792}, "ground_truth": 1}, {"key": "37518509", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7241583246845461, "res": {"Yes": 0.7241583246845461, "yes": 0.26853693875210105}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6917914372840125, "res": {"Yes": 0.6917914372840125, "yes": 0.29973216093193605}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9790480421298814, "res": {"Yes": 0.9790480421298814, "yes": 0.013622266260434706}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7951924803379017, "res": {"Yes": 0.7951924803379017, "yes": 0.20317583349522195}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8180750487088619, "res": {"Yes": 0.8180750487088619, "yes": 0.17901091452505044}, "ground_truth": 1}, {"key": "35454095", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7425837797429465, "res": {"Yes": 0.7425837797429465, "yes": 0.2138924629157722}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9888045388780066, "res": {"Yes": 0.9888045388780066, "yes": 0.007337026701227891}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9433616215063656, "res": {"Yes": 0.9433616215063656, "yes": 0.055397766606700684}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9798350694113483, "res": {"Yes": 0.9798350694113483, "yes": 0.017100458550745344}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8150858531507619, "res": {"Yes": 0.8150858531507619, "yes": 0.1815094791353406}, "ground_truth": 1}, {"key": "38542788", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9045998932014777, "res": {"Yes": 0.9045998932014777, "yes": 0.09361473337812136}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8791795816019846, "res": {"Yes": 0.8791795816019846, "yes": 0.11898220914862728}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7661808044139411, "res": {"Yes": 0.7661808044139411, "yes": 0.22776488231850248}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7390860083717237, "res": {"Yes": 0.7390860083717237, "yes": 0.25470565363781267}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7168272728388172, "res": {"Yes": 0.7168272728388172, "yes": 0.2784398559579218}, "ground_truth": 1}, {"key": "23944937", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7762618501168158, "res": {"Yes": 0.7762618501168158, "yes": 0.21178814923513423}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7306875783407372, "res": {"Yes": 0.7306875783407372, "yes": 0.26366534830567295}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7863651790962533, "res": {"Yes": 0.7863651790962533, "yes": 0.20675949936316887}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7864408232008754, "res": {"Yes": 0.7864408232008754, "yes": 0.2102729774026029}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8100136496806858, "res": {"Yes": 0.8100136496806858, "yes": 0.18433907837910338}, "ground_truth": 1}, {"key": "31753944", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8320585400492534, "res": {"Yes": 0.8320585400492534, "yes": 0.16530528541234554}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8229143040769786, "res": {"Yes": 0.8229143040769786, "yes": 0.17077421074197421}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8690658498546641, "res": {"Yes": 0.8690658498546641, "yes": 0.11879170472959102}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8855461461651833, "res": {"Yes": 0.8855461461651833, "yes": 0.10795362955821777}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7583390180224139, "res": {"Yes": 0.7583390180224139, "yes": 0.2295332683170672}, "ground_truth": 1}, {"key": "35527214", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9214722004896184, "res": {"Yes": 0.9214722004896184, "yes": 0.06513980322108923}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8208969869970861, "res": {"Yes": 0.8208969869970861, "yes": 0.16822320166884433}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9369513116938732, "res": {"Yes": 0.9369513116938732, "yes": 0.06009887238078116}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8637770305292904, "res": {"Yes": 0.8637770305292904, "yes": 0.13479711268285235}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9094950441697939, "res": {"Yes": 0.9094950441697939, "yes": 0.08924784538128133}, "ground_truth": 1}, {"key": "40400404", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9921706869905023, "res": {"Yes": 0.9921706869905023, "yes": 0.006402338274192143}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8654401565226446, "res": {"Yes": 0.8654401565226446, "yes": 0.13356533768603332}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8722028552154547, "res": {"Yes": 0.8722028552154547, "yes": 0.12009860083068379}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8977977129779923, "res": {"Yes": 0.8977977129779923, "yes": 0.09187575534874444}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8608249201545426, "res": {"Yes": 0.8608249201545426, "yes": 0.13127857328300824}, "ground_truth": 1}, {"key": "21713119", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9076351484675631, "res": {"Yes": 0.9076351484675631, "yes": 0.08684034162680443}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9584376051152645, "res": {"Yes": 0.9584376051152645, "yes": 0.033270162655822395}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8641920454941445, "res": {"Yes": 0.8641920454941445, "yes": 0.12800089793115205}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7175522081667942, "res": {"Yes": 0.7175522081667942, "yes": 0.2779760277846704}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8813768693480595, "res": {"Yes": 0.8813768693480595, "yes": 0.10860164763547649}, "ground_truth": 1}, {"key": "28730678", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8611502959776345, "res": {"Yes": 0.8611502959776345, "yes": 0.13336439302271472}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6152159097641591, "res": {"Yes": 0.6152159097641591, "yes": 0.37868136238760813}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9072064919542749, "res": {"Yes": 0.9072064919542749, "yes": 0.08482477676734586}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9421403631866442, "res": {"Yes": 0.9421403631866442, "yes": 0.05372535117211538}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8830041663377595, "res": {"Yes": 0.8830041663377595, "yes": 0.10894274608808896}, "ground_truth": 1}, {"key": "36823733", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8632399800668168, "res": {"Yes": 0.8632399800668168, "yes": 0.1283684030724092}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8869107427297619, "res": {"Yes": 0.8869107427297619, "yes": 0.09721667779317314}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9407411141959219, "res": {"Yes": 0.9407411141959219, "yes": 0.05567997566539388}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7408710429039423, "res": {"Yes": 0.7408710429039423, "yes": 0.24679915766092064}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7898282043639364, "res": {"Yes": 0.7898282043639364, "yes": 0.20579566910438502}, "ground_truth": 1}, {"key": "35988862", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8431111324110081, "res": {"Yes": 0.8431111324110081, "yes": 0.15232266683262624}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6509108690817783, "res": {"Yes": 0.6509108690817783, "yes": 0.34686521634872597}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9158487573535012, "res": {"Yes": 0.9158487573535012, "yes": 0.05843641727612863}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9923513241370223, "res": {"Yes": 0.9923513241370223, "yes": 0.00671473442033358}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8049642117542274, "res": {"Yes": 0.8049642117542274, "yes": 0.17650654873193744}, "ground_truth": 1}, {"key": "40499665", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8843827773610766, "res": {"Yes": 0.8843827773610766, "yes": 0.10453059241892286}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.911683885289949, "res": {"Yes": 0.911683885289949, "yes": 0.0790086451676908}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8187241868836727, "res": {"Yes": 0.8187241868836727, "yes": 0.17065588814594132}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9633464988604608, "res": {"Yes": 0.9633464988604608, "yes": 0.027884993562486936}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.49160068887477315, "res": {"Yes": 0.49160068887477315, "yes": 0.39616327979288507}, "ground_truth": 1}, {"key": "32829820", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7693769711746806, "res": {"Yes": 0.7693769711746806, "yes": 0.21625472147458677}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8589440096260105, "res": {"Yes": 0.8589440096260105, "yes": 0.13423503807968357}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7637815041888193, "res": {"Yes": 0.7637815041888193, "yes": 0.2089277459390921}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9016869330877381, "res": {"Yes": 0.9016869330877381, "yes": 0.08918080082062628}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9084895624360182, "res": {"Yes": 0.9084895624360182, "yes": 0.08537892401833512}, "ground_truth": 1}, {"key": "20583553", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9121153971040111, "res": {"Yes": 0.9121153971040111, "yes": 0.07479391107916829}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8880705987973554, "res": {"Yes": 0.8880705987973554, "yes": 0.10267670638432896}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8918792103297684, "res": {"Yes": 0.8918792103297684, "yes": 0.09859816777005563}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9460911646704472, "res": {"Yes": 0.9460911646704472, "yes": 0.04434318260857888}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9028676710422666, "res": {"Yes": 0.9028676710422666, "yes": 0.08806780938857578}, "ground_truth": 1}, {"key": "30501550", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9163757875681898, "res": {"Yes": 0.9163757875681898, "yes": 0.07379078645150723}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9102372856499801, "res": {"Yes": 0.9102372856499801, "yes": 0.0793748588530622}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8269866638711644, "res": {"Yes": 0.8269866638711644, "yes": 0.17035508244680048}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6936788761791212, "res": {"Yes": 0.6936788761791212, "yes": 0.3029088935619967}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6136462235718543, "res": {"Yes": 0.6136462235718543, "yes": 0.3802059877895032}, "ground_truth": 1}, {"key": "38755897", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7651533941709151, "res": {"Yes": 0.7651533941709151, "yes": 0.22541489433273812}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6555658651816021, "res": {"Yes": 0.6555658651816021, "yes": 0.33954761209665096}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9795705281038296, "res": {"Yes": 0.9795705281038296, "yes": 0.016913191489904816}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9803243673400266, "res": {"Yes": 0.9803243673400266, "yes": 0.01659717630827093}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9823715467573415, "res": {"Yes": 0.9823715467573415, "yes": 0.015787441449094587}, "ground_truth": 1}, {"key": "35507201", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.987319993387537, "res": {"Yes": 0.987319993387537, "yes": 0.010734972600572667}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9028919675382022, "res": {"Yes": 0.9028919675382022, "yes": 0.09357060443554933}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7434161476552256, "res": {"Yes": 0.7434161476552256, "yes": 0.2167890539178682}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6947868090798498, "res": {"Yes": 0.6947868090798498, "yes": 0.29955594027085636}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7325017859037721, "res": {"Yes": 0.7325017859037721, "yes": 0.260929845233332}, "ground_truth": 1}, {"key": "36453511", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9765214819330525, "res": {"Yes": 0.9765214819330525, "yes": 0.014579318435946552}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9838454719138783, "res": {"Yes": 0.9838454719138783, "yes": 0.011380174687572124}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.857919977637923, "res": {"Yes": 0.857919977637923, "yes": 0.1373845295202508}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8440711981048299, "res": {"Yes": 0.8440711981048299, "yes": 0.15168385886039792}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8637144262389246, "res": {"Yes": 0.8637144262389246, "yes": 0.13284695867004645}, "ground_truth": 1}, {"key": "38066835", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.875159345553842, "res": {"Yes": 0.875159345553842, "yes": 0.12158977501904672}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7768072957437618, "res": {"Yes": 0.7768072957437618, "yes": 0.21802132497303134}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7371175879958504, "res": {"Yes": 0.7371175879958504, "yes": 0.2531325283332875}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7891702831022386, "res": {"Yes": 0.7891702831022386, "yes": 0.1996684575918131}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7457565593380601, "res": {"Yes": 0.7457565593380601, "yes": 0.24661489219256905}, "ground_truth": 1}, {"key": "39697181", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8297543382613496, "res": {"Yes": 0.8297543382613496, "yes": 0.16341964129767325}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.690454936770208, "res": {"Yes": 0.690454936770208, "yes": 0.3004857942003581}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9664740984696485, "res": {"Yes": 0.9664740984696485, "yes": 0.030741353846207735}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9709261064985842, "res": {"Yes": 0.9709261064985842, "yes": 0.026181309517089798}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9611405824901252, "res": {"Yes": 0.9611405824901252, "yes": 0.03470788792981536}, "ground_truth": 1}, {"key": "21820893", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.43775972157947807, "res": {"yes": 0.4686195049008184, "Yes": 0.43775972157947807}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8916130241157538, "res": {"Yes": 0.8916130241157538, "yes": 0.06314026492666946}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9994169934144224, "res": {"Yes": 0.9994169934144224, " Yes": 0.00032211691121802476}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9478891253015654, "res": {"Yes": 0.9478891253015654, "yes": 0.04399016028320218}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9437819044033572, "res": {"Yes": 0.9437819044033572, "yes": 0.042648916918898196}, "ground_truth": 1}, {"key": "40519933", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.885975341525216, "res": {"Yes": 0.885975341525216, "yes": 0.07037049561791424}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.862309149934418, "res": {"Yes": 0.862309149934418, "yes": 0.09674933698151161}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7505050493124052, "res": {"Yes": 0.7505050493124052, "yes": 0.24605871319571385}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5300039763059798, "res": {"Yes": 0.5300039763059798, "yes": 0.469301434268294}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7206066156198325, "res": {"Yes": 0.7206066156198325, "yes": 0.2766368067759551}, "ground_truth": 1}, {"key": "30446033", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7199674761544718, "res": {"Yes": 0.7199674761544718, "yes": 0.2750249659949863}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5449028811081085, "res": {"Yes": 0.5449028811081085, "yes": 0.4478561463628652}, "ground_truth": 0}, {"key": "40216291", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8454114221492842, "res": {"Yes": 0.8454114221492842, "yes": 0.15026432034380813}, "ground_truth": 0}, {"key": "40216291", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9050490480594756, "res": {"Yes": 0.9050490480594756, "yes": 0.0917246430749435}, "ground_truth": 0}, {"key": "40216291", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9766116434408886, "res": {"Yes": 0.9766116434408886, "yes": 0.019658485535747554}, "ground_truth": 1}, {"key": "40216291", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9542158073128353, "res": {"Yes": 0.9542158073128353, "yes": 0.04000045699733858}, "ground_truth": 0}, {"key": "40216291", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9496432939029621, "res": {"Yes": 0.9496432939029621, "yes": 0.04500541784065002}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7682101119371815, "res": {"Yes": 0.7682101119371815, "yes": 0.21833185566160765}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7044601599913106, "res": {"Yes": 0.7044601599913106, "yes": 0.28269075989869935}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.754129556603837, "res": {"Yes": 0.754129556603837, "yes": 0.23373857243196292}, "ground_truth": 1}, {"key": "33479118", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.448487979232348, "res": {"yes": 0.48568091218558523, "Yes": 0.448487979232348}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7475290810943561, "res": {"Yes": 0.7475290810943561, "yes": 0.2349689267914555}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.860528210518347, "res": {"Yes": 0.860528210518347, "yes": 0.12309702665287142}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8865691838012345, "res": {"Yes": 0.8865691838012345, "yes": 0.10490108714516337}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8376036012068, "res": {"Yes": 0.8376036012068, "yes": 0.14100874066406757}, "ground_truth": 1}, {"key": "22297373", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7483974064858141, "res": {"Yes": 0.7483974064858141, "yes": 0.22846588033367143}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8671146355567257, "res": {"Yes": 0.8671146355567257, "yes": 0.12159450495329156}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7841341841689957, "res": {"Yes": 0.7841341841689957, "yes": 0.1973494033868438}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6539594385591831, "res": {"Yes": 0.6539594385591831, "yes": 0.3202056927005346}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6338431833585674, "res": {"Yes": 0.6338431833585674, "yes": 0.3447134799153941}, "ground_truth": 1}, {"key": "36463668", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6311025120303547, "res": {"Yes": 0.6311025120303547, "yes": 0.3375462300919388}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7207117453808976, "res": {"Yes": 0.7207117453808976, "yes": 0.25389151741163624}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8582573008585539, "res": {"Yes": 0.8582573008585539, "yes": 0.1293238959691576}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7786825389378251, "res": {"Yes": 0.7786825389378251, "yes": 0.21224281626364372}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7099044680797929, "res": {"Yes": 0.7099044680797929, "yes": 0.2854484575956071}, "ground_truth": 1}, {"key": "35264615", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8832996548338543, "res": {"Yes": 0.8832996548338543, "yes": 0.10831794950211264}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7704311544489449, "res": {"Yes": 0.7704311544489449, "yes": 0.21699716283939138}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9836615857535436, "res": {"Yes": 0.9836615857535436, " Yes": 0.010369521505440805}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9894393503964313, "res": {"Yes": 0.9894393503964313, "yes": 0.004795336959590142}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9766946482150494, "res": {"Yes": 0.9766946482150494, "yes": 0.012442515518688422}, "ground_truth": 1}, {"key": "39898482", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9866491321101885, "res": {"Yes": 0.9866491321101885, "yes": 0.01032264761491155}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8935521232911768, "res": {"Yes": 0.8935521232911768, "yes": 0.1022314007868538}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7878352299789413, "res": {"Yes": 0.7878352299789413, "yes": 0.20444900477488254}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9584987122527008, "res": {"Yes": 0.9584987122527008, "yes": 0.030954589601669233}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9732777832694779, "res": {"Yes": 0.9732777832694779, "yes": 0.02250889532777593}, "ground_truth": 1}, {"key": "37228721", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9689432038181061, "res": {"Yes": 0.9689432038181061, "yes": 0.026662209251946494}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7483677031822167, "res": {"Yes": 0.7483677031822167, "yes": 0.24828780540947054}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8207213338374272, "res": {"Yes": 0.8207213338374272, "yes": 0.16711534773371292}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8975004295612864, "res": {"Yes": 0.8975004295612864, "yes": 0.09762760377374799}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8505179943432144, "res": {"Yes": 0.8505179943432144, "yes": 0.1423512335048335}, "ground_truth": 1}, {"key": "24535799", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8498586563261964, "res": {"Yes": 0.8498586563261964, "yes": 0.13943076627804282}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.846586887025527, "res": {"Yes": 0.846586887025527, "yes": 0.1471501036381722}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8304536261156306, "res": {"Yes": 0.8304536261156306, "yes": 0.15381990637798224}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8781927468483919, "res": {"Yes": 0.8781927468483919, "yes": 0.11666344824718508}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5519167275348867, "res": {"Yes": 0.5519167275348867, "yes": 0.44415943196580576}, "ground_truth": 1}, {"key": "35177759", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7520757114353491, "res": {"Yes": 0.7520757114353491, "yes": 0.24430041548586875}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5576780413219709, "res": {"Yes": 0.5576780413219709, "yes": 0.4366624796887785}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8415908661636637, "res": {"Yes": 0.8415908661636637, "yes": 0.1546320894511874}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9576112276752873, "res": {"Yes": 0.9576112276752873, "yes": 0.030273043981604054}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9753521048477511, "res": {"Yes": 0.9753521048477511, "yes": 0.02082438340022975}, "ground_truth": 1}, {"key": "34364829", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9802596376788324, "res": {"Yes": 0.9802596376788324, "yes": 0.012141062215746257}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9842183048633866, "res": {"Yes": 0.9842183048633866, "yes": 0.011237228945603719}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7461949021245823, "res": {"Yes": 0.7461949021245823, "yes": 0.24991870350108042}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6417037566676069, "res": {"Yes": 0.6417037566676069, "yes": 0.35386913830383027}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7953618266543272, "res": {"Yes": 0.7953618266543272, "yes": 0.2018456061130753}, "ground_truth": 1}, {"key": "38090732", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8070726822675952, "res": {"Yes": 0.8070726822675952, "yes": 0.18969176758584597}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.756964092968951, "res": {"Yes": 0.756964092968951, "yes": 0.23967932378057266}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6953149156501766, "res": {"Yes": 0.6953149156501766, "yes": 0.29955501164888093}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9763565691935391, "res": {"Yes": 0.9763565691935391, "yes": 0.02003166114284509}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7929140096389959, "res": {"Yes": 0.7929140096389959, "yes": 0.20088999686773903}, "ground_truth": 1}, {"key": "30651479", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9828756788429598, "res": {"Yes": 0.9828756788429598, "yes": 0.013016792816539861}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9665575068503347, "res": {"Yes": 0.9665575068503347, "yes": 0.028874327513128774}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9166081916056639, "res": {"Yes": 0.9166081916056639, "yes": 0.07971809341038436}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8384111972550091, "res": {"Yes": 0.8384111972550091, "yes": 0.15643362192961496}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8681387299165295, "res": {"Yes": 0.8681387299165295, "yes": 0.12750528044325854}, "ground_truth": 1}, {"key": "39380921", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8702036885206912, "res": {"Yes": 0.8702036885206912, "yes": 0.12336287051785282}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8533800718306578, "res": {"Yes": 0.8533800718306578, "yes": 0.14009078531509683}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8857935757526302, "res": {"Yes": 0.8857935757526302, "yes": 0.10335082606479937}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8252035547918719, "res": {"Yes": 0.8252035547918719, "yes": 0.1622164539647025}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8403907808720565, "res": {"Yes": 0.8403907808720565, "yes": 0.1509491883182635}, "ground_truth": 1}, {"key": "39037490", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9266889912303317, "res": {"Yes": 0.9266889912303317, "yes": 0.06443282955915987}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9930963935369101, "res": {"Yes": 0.9930963935369101, "yes": 0.004431799365850101}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8508047437813964, "res": {"Yes": 0.8508047437813964, "yes": 0.13526157265489097}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7086084016016793, "res": {"Yes": 0.7086084016016793, "yes": 0.2824978892796539}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7180928720784542, "res": {"Yes": 0.7180928720784542, "yes": 0.2716925996855735}, "ground_truth": 1}, {"key": "35917499", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3744696577505526, "res": {"yes": 0.6160639166721424, "Yes": 0.3744696577505526}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5871659443751653, "res": {"Yes": 0.5871659443751653, "yes": 0.39617376241195285}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7894261183257961, "res": {"Yes": 0.7894261183257961, "yes": 0.20600226811215266}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9681065572114481, "res": {"Yes": 0.9681065572114481, "yes": 0.025709127358388373}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7823910104802644, "res": {"Yes": 0.7823910104802644, "yes": 0.21413279907512217}, "ground_truth": 1}, {"key": "34908073", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9728686089157697, "res": {"Yes": 0.9728686089157697, "yes": 0.02039354193632816}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9671346346560602, "res": {"Yes": 0.9671346346560602, "yes": 0.024086665267972103}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9264423487160979, "res": {"Yes": 0.9264423487160979, "yes": 0.07006307999026808}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9014061860820728, "res": {"Yes": 0.9014061860820728, "yes": 0.09369043275689332}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.922383710585006, "res": {"Yes": 0.922383710585006, "yes": 0.07163183363369321}, "ground_truth": 1}, {"key": "36344759", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8917999436282187, "res": {"Yes": 0.8917999436282187, "yes": 0.1017404008144236}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9326367904974052, "res": {"Yes": 0.9326367904974052, "yes": 0.061763018486645505}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7844954944796376, "res": {"Yes": 0.7844954944796376, "yes": 0.2124653420828465}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.959656858959986, "res": {"Yes": 0.959656858959986, "yes": 0.03603426815840923}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7408243917231985, "res": {"Yes": 0.7408243917231985, "yes": 0.2559227085029291}, "ground_truth": 1}, {"key": "39984637", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5003947960619057, "res": {"Yes": 0.5003947960619057, "yes": 0.49567512340202957}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7449692251345702, "res": {"Yes": 0.7449692251345702, "yes": 0.25162706208090585}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8371264873647747, "res": {"Yes": 0.8371264873647747, "yes": 0.14309557719361307}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8776951984535277, "res": {"Yes": 0.8776951984535277, "yes": 0.10749997112705396}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7749449999140237, "res": {"Yes": 0.7749449999140237, "yes": 0.19247679076070456}, "ground_truth": 1}, {"key": "17917326", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8700976432591705, "res": {"Yes": 0.8700976432591705, "yes": 0.11203830020814343}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.866654449908023, "res": {"Yes": 0.866654449908023, "yes": 0.11193899987865612}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8750146586692426, "res": {"Yes": 0.8750146586692426, "yes": 0.12130956646301284}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9005623657162208, "res": {"Yes": 0.9005623657162208, "yes": 0.09705346177381807}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9861869074851672, "res": {"Yes": 0.9861869074851672, "yes": 0.010891438032724148}, "ground_truth": 1}, {"key": "32193638", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.989584678541467, "res": {"Yes": 0.989584678541467, "yes": 0.008684898206222112}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9826775392859243, "res": {"Yes": 0.9826775392859243, "yes": 0.008757478131609528}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7465286676456028, "res": {"Yes": 0.7465286676456028, "yes": 0.2474916338923152}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9724870595046701, "res": {"Yes": 0.9724870595046701, "yes": 0.018856952503752553}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7099303587678577, "res": {"Yes": 0.7099303587678577, "yes": 0.2815673516957724}, "ground_truth": 1}, {"key": "34564692", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7205614061790035, "res": {"Yes": 0.7205614061790035, "yes": 0.2720310404216493}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7559814349867219, "res": {"Yes": 0.7559814349867219, "yes": 0.23665551936472393}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9439328168125659, "res": {"Yes": 0.9439328168125659, "yes": 0.05218071483299042}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9354285859711415, "res": {"Yes": 0.9354285859711415, "yes": 0.0566755260931475}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8134341924016736, "res": {"Yes": 0.8134341924016736, "yes": 0.17832261952301487}, "ground_truth": 1}, {"key": "39329284", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.903447001295688, "res": {"Yes": 0.903447001295688, "yes": 0.08354660993902646}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9614209390607374, "res": {"Yes": 0.9614209390607374, "yes": 0.037757818958726236}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7658388040223328, "res": {"Yes": 0.7658388040223328, "yes": 0.21804766353990546}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9053866363767684, "res": {"Yes": 0.9053866363767684, "yes": 0.09020018508279291}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6784397109700343, "res": {"Yes": 0.6784397109700343, "yes": 0.3112881401736351}, "ground_truth": 1}, {"key": "37438541", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9293917770130494, "res": {"Yes": 0.9293917770130494, "yes": 0.05953539912868109}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7665260236305179, "res": {"Yes": 0.7665260236305179, "yes": 0.22542400127842824}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9718913632225045, "res": {"Yes": 0.9718913632225045, "yes": 0.016383459830473367}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9580031786600763, "res": {"Yes": 0.9580031786600763, "yes": 0.03775431897113568}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9870891523816975, "res": {"Yes": 0.9870891523816975, "yes": 0.010452116345243595}, "ground_truth": 1}, {"key": "34652757", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9738672590208548, "res": {"Yes": 0.9738672590208548, "yes": 0.02315331296614955}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7274341787315973, "res": {"Yes": 0.7274341787315973, "yes": 0.2663858386365667}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.2677429501427462, "res": {"yes": 0.6450915979136032, "Yes": 0.2677429501427462}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.48624897112014864, "res": {"Yes": 0.48624897112014864, "yes": 0.3968080565389272}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2702667891170622, "res": {"yes": 0.7095554725685177, "Yes": 0.2702667891170622}, "ground_truth": 1}, {"key": "31361004", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2734389772309991, "res": {"yes": 0.6595912724262393, "Yes": 0.2734389772309991}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.26624775338092976, "res": {"yes": 0.6596429336377224, "Yes": 0.26624775338092976}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.47171821678873116, "res": {"Yes": 0.47171821678873116, "yes": 0.4480514990218452}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6893517884274873, "res": {"Yes": 0.6893517884274873, "yes": 0.2613279884156837}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5228577502303329, "res": {"Yes": 0.5228577502303329, "yes": 0.4395888591177694}, "ground_truth": 1}, {"key": "26150727", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7373139675335701, "res": {"Yes": 0.7373139675335701, "yes": 0.22229827896272666}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5154079479229812, "res": {"Yes": 0.5154079479229812, "yes": 0.17622128016251487}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8248135155139725, "res": {"Yes": 0.8248135155139725, "yes": 0.17040007916562985}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9870518638191175, "res": {"Yes": 0.9870518638191175, "yes": 0.010080685628012951}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8204849426930219, "res": {"Yes": 0.8204849426930219, "yes": 0.1531989922040238}, "ground_truth": 1}, {"key": "36997402", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9895042468832654, "res": {"Yes": 0.9895042468832654, "yes": 0.009177742680108405}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8735753857857381, "res": {"Yes": 0.8735753857857381, "yes": 0.12115912678159356}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5399466989195947, "res": {"Yes": 0.5399466989195947, "yes": 0.443363430674194}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7476720522262885, "res": {"Yes": 0.7476720522262885, "yes": 0.24213845217404079}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9723557301058157, "res": {"Yes": 0.9723557301058157, "yes": 0.0218937843955128}, "ground_truth": 1}, {"key": "37430643", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.738203645525723, "res": {"Yes": 0.738203645525723, "yes": 0.25172176716201033}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9704371799893091, "res": {"Yes": 0.9704371799893091, "yes": 0.02336943282706368}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7770513059272695, "res": {"Yes": 0.7770513059272695, "yes": 0.16491857693511547}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9689768926210592, "res": {"Yes": 0.9689768926210592, "yes": 0.016319933568696796}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.985418162934327, "res": {"Yes": 0.985418162934327, "yes": 0.010011108024726722}, "ground_truth": 1}, {"key": "36964631", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8033442757441793, "res": {"Yes": 0.8033442757441793, "yes": 0.13293163572735636}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7436040847062919, "res": {"Yes": 0.7436040847062919, "yes": 0.208444033132306}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.914823631266514, "res": {"Yes": 0.914823631266514, "yes": 0.07880614523052824}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8238034986696825, "res": {"Yes": 0.8238034986696825, "yes": 0.16599513471514815}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9611008334210119, "res": {"Yes": 0.9611008334210119, "yes": 0.029488895801682388}, "ground_truth": 1}, {"key": "35502013", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6975336658498543, "res": {"Yes": 0.6975336658498543, "yes": 0.2946091548235748}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7693009220198714, "res": {"Yes": 0.7693009220198714, "yes": 0.22174404782036058}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7414591722092299, "res": {"Yes": 0.7414591722092299, "yes": 0.2046282828847218}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9791557031925888, "res": {"Yes": 0.9791557031925888, "yes": 0.017632314663684857}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7828292486630692, "res": {"Yes": 0.7828292486630692, "yes": 0.20528013920345553}, "ground_truth": 1}, {"key": "33987664", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6809477848508978, "res": {"Yes": 0.6809477848508978, "yes": 0.24128470359738524}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9770720065171186, "res": {"Yes": 0.9770720065171186, "yes": 0.017448580157089933}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7218999233609501, "res": {"Yes": 0.7218999233609501, "yes": 0.27204039845039696}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7154444292628372, "res": {"Yes": 0.7154444292628372, "yes": 0.2791801239273385}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7427753242251369, "res": {"Yes": 0.7427753242251369, "yes": 0.2532451973753984}, "ground_truth": 1}, {"key": "35203721", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7431184096527238, "res": {"Yes": 0.7431184096527238, "yes": 0.2531271619805704}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7099437552801233, "res": {"Yes": 0.7099437552801233, "yes": 0.28470244407405276}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7676534842770122, "res": {"Yes": 0.7676534842770122, "yes": 0.2267563468183508}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8718721349993992, "res": {"Yes": 0.8718721349993992, "yes": 0.12409189489304077}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8065751139853902, "res": {"Yes": 0.8065751139853902, "yes": 0.18812088828692033}, "ground_truth": 1}, {"key": "39028348", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.750837821954372, "res": {"Yes": 0.750837821954372, "yes": 0.24514076068754379}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7928935527214426, "res": {"Yes": 0.7928935527214426, "yes": 0.2047055446437503}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8625153699530331, "res": {"Yes": 0.8625153699530331, "yes": 0.129466126951434}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9708255363849585, "res": {"Yes": 0.9708255363849585, "yes": 0.024415663644573135}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5611819469094288, "res": {"Yes": 0.5611819469094288, "yes": 0.4277336671955827}, "ground_truth": 1}, {"key": "37459383", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9815250994996315, "res": {"Yes": 0.9815250994996315, "yes": 0.012752983943184448}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8083856861538641, "res": {"Yes": 0.8083856861538641, "yes": 0.18691399270267584}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8751549522649545, "res": {"Yes": 0.8751549522649545, "yes": 0.11322639149170097}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8340503556240435, "res": {"Yes": 0.8340503556240435, "yes": 0.15565010616804822}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8387529781517632, "res": {"Yes": 0.8387529781517632, "yes": 0.15339100949698276}, "ground_truth": 1}, {"key": "34020070", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8195879377219776, "res": {"Yes": 0.8195879377219776, "yes": 0.16686578038217967}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8888697785666602, "res": {"Yes": 0.8888697785666602, "yes": 0.09708526169643808}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9177815909718691, "res": {"Yes": 0.9177815909718691, "yes": 0.0714214667544481}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8853468817407383, "res": {"Yes": 0.8853468817407383, "yes": 0.10280878216390527}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7955538024423585, "res": {"Yes": 0.7955538024423585, "yes": 0.19016312928543416}, "ground_truth": 1}, {"key": "35176615", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8435213720269725, "res": {"Yes": 0.8435213720269725, "yes": 0.14456500658954194}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.942909174942994, "res": {"Yes": 0.942909174942994, "yes": 0.05097975427873405}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7777968088957554, "res": {"Yes": 0.7777968088957554, "yes": 0.217987839469268}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6298112674213096, "res": {"Yes": 0.6298112674213096, "yes": 0.3660321894980034}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6207439195314196, "res": {"Yes": 0.6207439195314196, "yes": 0.3749347380588842}, "ground_truth": 1}, {"key": "33296389", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6996158152228055, "res": {"Yes": 0.6996158152228055, "yes": 0.2962690158706157}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7413594971384264, "res": {"Yes": 0.7413594971384264, "yes": 0.25102204877080486}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6200679067031951, "res": {"Yes": 0.6200679067031951, "yes": 0.37509944556295993}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8897551112652662, "res": {"Yes": 0.8897551112652662, "yes": 0.10764936004114271}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8715454185536881, "res": {"Yes": 0.8715454185536881, "yes": 0.12557501121962805}, "ground_truth": 1}, {"key": "35399504", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8356050548101404, "res": {"Yes": 0.8356050548101404, "yes": 0.16114614384277692}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6063327204107546, "res": {"Yes": 0.6063327204107546, "yes": 0.3890048140462634}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.856245615502358, "res": {"Yes": 0.856245615502358, "yes": 0.13108291723772333}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8108407067528206, "res": {"Yes": 0.8108407067528206, "yes": 0.1749298399531111}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7844794909347869, "res": {"Yes": 0.7844794909347869, "yes": 0.19985069824210694}, "ground_truth": 1}, {"key": "34807886", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8291307219244756, "res": {"Yes": 0.8291307219244756, "yes": 0.15880278381525367}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8664064055747758, "res": {"Yes": 0.8664064055747758, "yes": 0.12167862879959021}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7118303560031977, "res": {"Yes": 0.7118303560031977, "yes": 0.27856197860819365}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5903935916536234, "res": {"Yes": 0.5903935916536234, "yes": 0.39514111291214365}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7226719675116607, "res": {"Yes": 0.7226719675116607, "yes": 0.2668429755870076}, "ground_truth": 1}, {"key": "37629813", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5203114835045869, "res": {"Yes": 0.5203114835045869, "yes": 0.47300590803876363}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6149500401523729, "res": {"Yes": 0.6149500401523729, "yes": 0.37703949339682646}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9187987946888083, "res": {"Yes": 0.9187987946888083, "yes": 0.07857421396308675}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9126541429597507, "res": {"Yes": 0.9126541429597507, "yes": 0.08365090773702445}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.950712680157188, "res": {"Yes": 0.950712680157188, "yes": 0.04570302457553014}, "ground_truth": 1}, {"key": "28084389", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.927341118484079, "res": {"Yes": 0.927341118484079, "yes": 0.06929489782192105}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8473223908573077, "res": {"Yes": 0.8473223908573077, "yes": 0.14651124438341534}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9402386838130451, "res": {"Yes": 0.9402386838130451, "yes": 0.04825220754576065}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8191992829520092, "res": {"Yes": 0.8191992829520092, "yes": 0.1728498120455191}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8595393180041385, "res": {"Yes": 0.8595393180041385, "yes": 0.12917379929926856}, "ground_truth": 1}, {"key": "35391734", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8800840629435445, "res": {"Yes": 0.8800840629435445, "yes": 0.10695882838673339}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.900683427445797, "res": {"Yes": 0.900683427445797, "yes": 0.0895618348570473}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8125557920477761, "res": {"Yes": 0.8125557920477761, "yes": 0.17778317307573188}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8992060748742856, "res": {"Yes": 0.8992060748742856, "yes": 0.09657837732831014}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8324854373110475, "res": {"Yes": 0.8324854373110475, "yes": 0.15550493679328758}, "ground_truth": 1}, {"key": "40214591", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8040650782354235, "res": {"Yes": 0.8040650782354235, "yes": 0.18831145714551523}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8346103164080415, "res": {"Yes": 0.8346103164080415, "yes": 0.15330987178073358}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9805420901853767, "res": {"Yes": 0.9805420901853767, "yes": 0.014034565892490662}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9791829045158482, "res": {"Yes": 0.9791829045158482, "yes": 0.01948134379170309}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.941415495585739, "res": {"Yes": 0.941415495585739, "yes": 0.05722591063688269}, "ground_truth": 1}, {"key": "26283171", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9929949426012845, "res": {"Yes": 0.9929949426012845, "yes": 0.005823851886426485}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9480448829157592, "res": {"Yes": 0.9480448829157592, "yes": 0.05043194879877274}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6452437478051886, "res": {"Yes": 0.6452437478051886, "yes": 0.34547323714665473}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7652871470216722, "res": {"Yes": 0.7652871470216722, "yes": 0.22512062709876043}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8898759215177411, "res": {"Yes": 0.8898759215177411, "yes": 0.10133498165946722}, "ground_truth": 1}, {"key": "37084030", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8226368559262753, "res": {"Yes": 0.8226368559262753, "yes": 0.16721778740311313}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8257113101517238, "res": {"Yes": 0.8257113101517238, "yes": 0.1639260282772851}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7272164611719486, "res": {"Yes": 0.7272164611719486, "yes": 0.2662302348551231}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.810932807270583, "res": {"Yes": 0.810932807270583, "yes": 0.18443636133784405}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7906491832691012, "res": {"Yes": 0.7906491832691012, "yes": 0.20496058225810487}, "ground_truth": 1}, {"key": "39027295", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8118515871269081, "res": {"Yes": 0.8118515871269081, "yes": 0.18353490712772158}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7663134417906189, "res": {"Yes": 0.7663134417906189, "yes": 0.2288525658536909}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8925961857381968, "res": {"Yes": 0.8925961857381968, "yes": 0.10373129762253458}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8735348965049532, "res": {"Yes": 0.8735348965049532, "yes": 0.12177777641513264}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9091754673907917, "res": {"Yes": 0.9091754673907917, "yes": 0.0878444537081486}, "ground_truth": 1}, {"key": "14018647", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8709281908652072, "res": {"Yes": 0.8709281908652072, "yes": 0.12594288102651174}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7783246141397805, "res": {"Yes": 0.7783246141397805, "yes": 0.21690749644282709}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8921274370130777, "res": {"Yes": 0.8921274370130777, "yes": 0.09342004894351452}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.43421868035360184, "res": {"yes": 0.5225563259980869, "Yes": 0.43421868035360184}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8962704993256502, "res": {"Yes": 0.8962704993256502, "yes": 0.10026429795225594}, "ground_truth": 1}, {"key": "37424289", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9873951836299384, "res": {"Yes": 0.9873951836299384, "yes": 0.009688501587205437}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6610944895181918, "res": {"Yes": 0.6610944895181918, "yes": 0.3140044252563821}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9251067987744026, "res": {"Yes": 0.9251067987744026, "yes": 0.06815396105032709}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9344730279965997, "res": {"Yes": 0.9344730279965997, "yes": 0.05776663610398854}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9424400076939284, "res": {"Yes": 0.9424400076939284, "yes": 0.044511321786554946}, "ground_truth": 1}, {"key": "37498031", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9413154547940538, "res": {"Yes": 0.9413154547940538, "yes": 0.05343493678671437}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9197001775697015, "res": {"Yes": 0.9197001775697015, "yes": 0.07330003445757073}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8902414775210822, "res": {"Yes": 0.8902414775210822, "yes": 0.10369205312504143}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8573844681838576, "res": {"Yes": 0.8573844681838576, "yes": 0.1324257060325957}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7182748670518668, "res": {"Yes": 0.7182748670518668, "yes": 0.27422797307711416}, "ground_truth": 1}, {"key": "30104095", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9279542471977319, "res": {"Yes": 0.9279542471977319, "yes": 0.06799681589336043}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8432578296466475, "res": {"Yes": 0.8432578296466475, "yes": 0.15110865974433157}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9587329270202841, "res": {"Yes": 0.9587329270202841, "yes": 0.03836248691978161}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7204347426178506, "res": {"Yes": 0.7204347426178506, "yes": 0.27477442243102346}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9734077693036868, "res": {"Yes": 0.9734077693036868, "yes": 0.017101889919027974}, "ground_truth": 1}, {"key": "37911407", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.761581611774582, "res": {"Yes": 0.761581611774582, "yes": 0.2369161021231483}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9778772901633354, "res": {"Yes": 0.9778772901633354, "yes": 0.020678379358296882}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9806766670550685, "res": {"Yes": 0.9806766670550685, "yes": 0.015083286537429286}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.750547303936138, "res": {"Yes": 0.750547303936138, "yes": 0.24832038290657735}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9816061866373592, "res": {"Yes": 0.9816061866373592, "yes": 0.01737849598501852}, "ground_truth": 1}, {"key": "39177472", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9792285080839938, "res": {"Yes": 0.9792285080839938, "yes": 0.01933054483845579}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9877764198908789, "res": {"Yes": 0.9877764198908789, "yes": 0.010608196732898041}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.987047796186768, "res": {"Yes": 0.987047796186768, "yes": 0.009879345641827145}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.856587651172834, "res": {"Yes": 0.856587651172834, "yes": 0.139224380381408}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9723254164161061, "res": {"Yes": 0.9723254164161061, "yes": 0.022125570568807085}, "ground_truth": 1}, {"key": "32325454", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.97971685748195, "res": {"Yes": 0.97971685748195, "yes": 0.01683498697592882}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.987441910250408, "res": {"Yes": 0.987441910250408, "yes": 0.009838411636522734}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5551044970420864, "res": {"Yes": 0.5551044970420864, "yes": 0.4394136914918027}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9680204266665001, "res": {"Yes": 0.9680204266665001, "yes": 0.029635322541459334}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8719499966575259, "res": {"Yes": 0.8719499966575259, "yes": 0.12558768237750928}, "ground_truth": 1}, {"key": "38395319", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6724378756425268, "res": {"Yes": 0.6724378756425268, "yes": 0.32551719249608546}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7320396498690526, "res": {"Yes": 0.7320396498690526, "yes": 0.26590581815060044}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7953240558180551, "res": {"Yes": 0.7953240558180551, "yes": 0.19116397717720285}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7689684789334601, "res": {"Yes": 0.7689684789334601, "yes": 0.22346077135462505}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5874629492845977, "res": {"Yes": 0.5874629492845977, "yes": 0.3960337120150452}, "ground_truth": 1}, {"key": "38235895", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8265227056893354, "res": {"Yes": 0.8265227056893354, "yes": 0.16669479724506372}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7501017162356739, "res": {"Yes": 0.7501017162356739, "yes": 0.24241549586364303}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8767675088388764, "res": {"Yes": 0.8767675088388764, "yes": 0.11277136821715765}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.840784663459702, "res": {"Yes": 0.840784663459702, "yes": 0.15371491399438617}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8708204288782412, "res": {"Yes": 0.8708204288782412, "yes": 0.12139703791944514}, "ground_truth": 1}, {"key": "26543267", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8376885804026393, "res": {"Yes": 0.8376885804026393, "yes": 0.15456995530113077}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8556996248253723, "res": {"Yes": 0.8556996248253723, "yes": 0.1352339144914267}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9370635136463693, "res": {"Yes": 0.9370635136463693, "yes": 0.05463552864860096}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8958813022054096, "res": {"Yes": 0.8958813022054096, "yes": 0.09441699407988724}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9303149725618381, "res": {"Yes": 0.9303149725618381, "yes": 0.062070732340726915}, "ground_truth": 1}, {"key": "39054728", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9842815872136453, "res": {"Yes": 0.9842815872136453, " Yes": 0.00849714206219691}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9908185215610714, "res": {"Yes": 0.9908185215610714, " Yes": 0.00485445713084774}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6849936779723135, "res": {"Yes": 0.6849936779723135, "yes": 0.2997142374846287}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.931449918261931, "res": {"Yes": 0.931449918261931, "yes": 0.062249772391839285}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9297069723301905, "res": {"Yes": 0.9297069723301905, "yes": 0.06329765476781189}, "ground_truth": 1}, {"key": "39158443", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8714970491252223, "res": {"Yes": 0.8714970491252223, "yes": 0.11535693209298767}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8333323306622321, "res": {"Yes": 0.8333323306622321, "yes": 0.15573634464339559}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7813021798139761, "res": {"Yes": 0.7813021798139761, "yes": 0.20911093047537196}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8384657544466134, "res": {"Yes": 0.8384657544466134, "yes": 0.1546493473553433}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.850298852597783, "res": {"Yes": 0.850298852597783, "yes": 0.14258217197940987}, "ground_truth": 1}, {"key": "36254201", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8503862593090324, "res": {"Yes": 0.8503862593090324, "yes": 0.14100356573824568}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9100301210173217, "res": {"Yes": 0.9100301210173217, "yes": 0.08160300432349016}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6746274610657207, "res": {"Yes": 0.6746274610657207, "yes": 0.2939434138694363}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5690605703465974, "res": {"Yes": 0.5690605703465974, "yes": 0.40860234398091744}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.382196081390742, "res": {"yes": 0.5821373049017827, "Yes": 0.382196081390742}, "ground_truth": 1}, {"key": "23434347", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7097062549338803, "res": {"Yes": 0.7097062549338803, "yes": 0.2598589747315509}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6472192759985892, "res": {"Yes": 0.6472192759985892, "yes": 0.3391482335723651}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8256730393194317, "res": {"Yes": 0.8256730393194317, "yes": 0.14403199738893246}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5692874032525543, "res": {"Yes": 0.5692874032525543, "yes": 0.4195869097164072}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7436873507172171, "res": {"Yes": 0.7436873507172171, "yes": 0.22782964508013603}, "ground_truth": 1}, {"key": "34397620", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7421670695805168, "res": {"Yes": 0.7421670695805168, "yes": 0.18013482152257915}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8314851997220424, "res": {"Yes": 0.8314851997220424, "yes": 0.08819487713728001}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7802024591237305, "res": {"Yes": 0.7802024591237305, "yes": 0.2062189315547461}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7839610821332518, "res": {"Yes": 0.7839610821332518, "yes": 0.2086560242182007}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.731046799556874, "res": {"Yes": 0.731046799556874, "yes": 0.26019607436716435}, "ground_truth": 1}, {"key": "34340916", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8839944275860957, "res": {"Yes": 0.8839944275860957, "yes": 0.09156080425721218}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.856028832996964, "res": {"Yes": 0.856028832996964, "yes": 0.13590237253913243}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7752054639432621, "res": {"Yes": 0.7752054639432621, "yes": 0.22073409321733067}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7902688539196419, "res": {"Yes": 0.7902688539196419, "yes": 0.20208598577503156}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7421071791146144, "res": {"Yes": 0.7421071791146144, "yes": 0.25168219961030347}, "ground_truth": 1}, {"key": "30375089", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8358373602326993, "res": {"Yes": 0.8358373602326993, "yes": 0.15912061172789907}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6702971080761347, "res": {"Yes": 0.6702971080761347, "yes": 0.3175806745038561}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5571246365960703, "res": {"Yes": 0.5571246365960703, "yes": 0.36131847765382963}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6573067332267828, "res": {"Yes": 0.6573067332267828, "yes": 0.30391567528040314}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8241997198358573, "res": {"Yes": 0.8241997198358573, "yes": 0.14932948584469743}, "ground_truth": 1}, {"key": "35807797", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7037880454213462, "res": {"Yes": 0.7037880454213462, "yes": 0.20982122927158625}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9899064007698998, "res": {"Yes": 0.9899064007698998, "yes": 0.008482447080442277}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8242589901687499, "res": {"Yes": 0.8242589901687499, "yes": 0.16805881022042737}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8286840568994253, "res": {"Yes": 0.8286840568994253, "yes": 0.16253178440505742}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8570866131258984, "res": {"Yes": 0.8570866131258984, "yes": 0.13715478135157996}, "ground_truth": 1}, {"key": "34188172", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8254631633978347, "res": {"Yes": 0.8254631633978347, "yes": 0.16827046220362446}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.886847650143754, "res": {"Yes": 0.886847650143754, "yes": 0.09607951971749651}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7458700721244952, "res": {"Yes": 0.7458700721244952, "yes": 0.24037323927716742}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7174764171300196, "res": {"Yes": 0.7174764171300196, "yes": 0.27076670392235885}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8045252396121464, "res": {"Yes": 0.8045252396121464, "yes": 0.18853049650640785}, "ground_truth": 1}, {"key": "37075567", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9735162158899359, "res": {"Yes": 0.9735162158899359, "yes": 0.01831893966425287}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5628577764913788, "res": {"Yes": 0.5628577764913788, "yes": 0.4308511104295682}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.17952663458000923, "res": {"yes": 0.752722127403218, "Yes": 0.17952663458000923}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.3316585178798382, "res": {"yes": 0.5859787223777068, "Yes": 0.3316585178798382}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5023163391937302, "res": {"Yes": 0.5023163391937302, "yes": 0.3905996877308764}, "ground_truth": 1}, {"key": "35559735", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2759690933917338, "res": {"yes": 0.6089455464294149, "Yes": 0.2759690933917338}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.16459303604298797, "res": {"yes": 0.66976057580933, "Yes": 0.16459303604298797}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9862720230328841, "res": {"Yes": 0.9862720230328841, "yes": 0.008980704029675327}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7664211930352697, "res": {"Yes": 0.7664211930352697, "yes": 0.22647373523731287}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9766598540879807, "res": {"Yes": 0.9766598540879807, "yes": 0.013229712923429929}, "ground_truth": 1}, {"key": "33005019", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9740899280579827, "res": {"Yes": 0.9740899280579827, "yes": 0.017679022707952005}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9823891382421343, "res": {"Yes": 0.9823891382421343, "yes": 0.013831177443733782}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9556789774211707, "res": {"Yes": 0.9556789774211707, "yes": 0.03677774397685644}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8733426438980204, "res": {"Yes": 0.8733426438980204, "yes": 0.11693355634321209}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9294487263095319, "res": {"Yes": 0.9294487263095319, "yes": 0.06031986459878853}, "ground_truth": 1}, {"key": "30808252", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9040577661708932, "res": {"Yes": 0.9040577661708932, "yes": 0.08449361581148919}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9252280234601078, "res": {"Yes": 0.9252280234601078, "yes": 0.06311736179343366}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5951270302530317, "res": {"Yes": 0.5951270302530317, "yes": 0.39966600826542775}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9333959028183505, "res": {"Yes": 0.9333959028183505, "yes": 0.05546428382898002}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.949665117906267, "res": {"Yes": 0.949665117906267, "yes": 0.04630706674754695}, "ground_truth": 1}, {"key": "15159017", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6484843228483231, "res": {"Yes": 0.6484843228483231, "yes": 0.34586159444745235}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7017139616700517, "res": {"Yes": 0.7017139616700517, "yes": 0.29503065877252627}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.749239714955027, "res": {"Yes": 0.749239714955027, "yes": 0.24122050627958994}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6280575164524638, "res": {"Yes": 0.6280575164524638, "yes": 0.3571424704220516}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7906701199366745, "res": {"Yes": 0.7906701199366745, "yes": 0.1997867162925866}, "ground_truth": 1}, {"key": "24493400", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7222199980951004, "res": {"Yes": 0.7222199980951004, "yes": 0.2685539913891273}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7686817071770754, "res": {"Yes": 0.7686817071770754, "yes": 0.22378021317350288}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7870374546656828, "res": {"Yes": 0.7870374546656828, "yes": 0.18558122911100755}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7522762039356832, "res": {"Yes": 0.7522762039356832, "yes": 0.22638429577735034}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.685388073829593, "res": {"Yes": 0.685388073829593, "yes": 0.26661526894915677}, "ground_truth": 1}, {"key": "37791071", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.668220867583381, "res": {"Yes": 0.668220867583381, "yes": 0.32312833910034783}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.795915957294633, "res": {"Yes": 0.795915957294633, "yes": 0.18683718683636175}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7484036780823584, "res": {"Yes": 0.7484036780823584, "yes": 0.24549590797812995}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.47195468705839216, "res": {"yes": 0.5242508278963102, "Yes": 0.47195468705839216}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7911745939264933, "res": {"Yes": 0.7911745939264933, "yes": 0.20165920707788076}, "ground_truth": 1}, {"key": "33528627", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9677289676575221, "res": {"Yes": 0.9677289676575221, "yes": 0.026321857534403224}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7884501940616491, "res": {"Yes": 0.7884501940616491, "yes": 0.19927539828499363}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7292916779993102, "res": {"Yes": 0.7292916779993102, "yes": 0.263535393325493}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8636243541583024, "res": {"Yes": 0.8636243541583024, "yes": 0.12479488618101693}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9822626038645329, "res": {"Yes": 0.9822626038645329, "yes": 0.011400922566159412}, "ground_truth": 1}, {"key": "39925662", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9634337887511609, "res": {"Yes": 0.9634337887511609, "yes": 0.027131135476966477}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8795721877529363, "res": {"Yes": 0.8795721877529363, "yes": 0.10950006895497291}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9316549482939056, "res": {"Yes": 0.9316549482939056, "yes": 0.06163742156213319}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7437375736197683, "res": {"Yes": 0.7437375736197683, "yes": 0.2477075159480118}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7864586677455995, "res": {"Yes": 0.7864586677455995, "yes": 0.20207620505001547}, "ground_truth": 1}, {"key": "29213416", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7671221449753928, "res": {"Yes": 0.7671221449753928, "yes": 0.2238438877014228}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8708586500256216, "res": {"Yes": 0.8708586500256216, "yes": 0.12437369201761062}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.807636885988426, "res": {"Yes": 0.807636885988426, "yes": 0.18597645487039818}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5581096670386049, "res": {"Yes": 0.5581096670386049, "yes": 0.43401918100251974}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.970183304137783, "res": {"Yes": 0.970183304137783, "yes": 0.026262873266601944}, "ground_truth": 1}, {"key": "34492745", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9665793309986647, "res": {"Yes": 0.9665793309986647, "yes": 0.028842286138351776}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7785764428835578, "res": {"Yes": 0.7785764428835578, "yes": 0.21575734620082765}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5565711465428567, "res": {"Yes": 0.5565711465428567, "yes": 0.33804525565212695}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6683742686064879, "res": {"Yes": 0.6683742686064879, "yes": 0.2561525790840864}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9714283478768994, "res": {"Yes": 0.9714283478768994, "yes": 0.023802092071595476}, "ground_truth": 1}, {"key": "34191937", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7282532430083142, "res": {"Yes": 0.7282532430083142, "yes": 0.2026032998070186}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7814499380262776, "res": {"Yes": 0.7814499380262776, "yes": 0.21126157950969982}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6425252650184347, "res": {"Yes": 0.6425252650184347, "yes": 0.26358083074389527}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9800855560855659, "res": {"Yes": 0.9800855560855659, "yes": 0.017623278655315048}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7419327821593681, "res": {"Yes": 0.7419327821593681, "yes": 0.1844919034876939}, "ground_truth": 1}, {"key": "34933372", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6600104768978542, "res": {"Yes": 0.6600104768978542, "yes": 0.3068534828123816}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9805140059010266, "res": {"Yes": 0.9805140059010266, "yes": 0.017417375135435127}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9162976148842555, "res": {"Yes": 0.9162976148842555, "yes": 0.07442272055198366}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9798826866333754, "res": {"Yes": 0.9798826866333754, "yes": 0.016711095148709137}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9593972848195002, "res": {"Yes": 0.9593972848195002, "yes": 0.030262292905885873}, "ground_truth": 1}, {"key": "38714379", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9639431761610459, "res": {"Yes": 0.9639431761610459, "yes": 0.030247068750814295}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9828407922898921, "res": {"Yes": 0.9828407922898921, "yes": 0.014334973905260678}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8812792798904565, "res": {"Yes": 0.8812792798904565, "yes": 0.11676440578776025}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8645206824233262, "res": {"Yes": 0.8645206824233262, "yes": 0.1329386148214317}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8657801941634519, "res": {"Yes": 0.8657801941634519, "yes": 0.1314314967975299}, "ground_truth": 1}, {"key": "39220660", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.91726215891429, "res": {"Yes": 0.91726215891429, "yes": 0.0806560065849063}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8728963401481955, "res": {"Yes": 0.8728963401481955, "yes": 0.12459365461112488}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7577295130237152, "res": {"Yes": 0.7577295130237152, "yes": 0.2346951361569739}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7936294454961607, "res": {"Yes": 0.7936294454961607, "yes": 0.2012926821405319}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9815899873241302, "res": {"Yes": 0.9815899873241302, "yes": 0.011827433180242234}, "ground_truth": 1}, {"key": "41028780", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9704480702964484, "res": {"Yes": 0.9704480702964484, "yes": 0.021580293903304716}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.961680727978705, "res": {"Yes": 0.961680727978705, "yes": 0.03396589316689324}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9593574610647551, "res": {"Yes": 0.9593574610647551, "yes": 0.023406540198680808}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9168942216468158, "res": {"Yes": 0.9168942216468158, "yes": 0.06631383704641784}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.40929836095479283, "res": {"yes": 0.5153746330299763, "Yes": 0.40929836095479283}, "ground_truth": 1}, {"key": "39457108", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.35348359573432164, "res": {"yes": 0.5617697559245907, "Yes": 0.35348359573432164}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8950997002357539, "res": {"Yes": 0.8950997002357539, "yes": 0.09260457800991966}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9110358319035654, "res": {"Yes": 0.9110358319035654, "yes": 0.07560899661749593}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8365884047826362, "res": {"Yes": 0.8365884047826362, "yes": 0.15306345576652272}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9357257664452022, "res": {"Yes": 0.9357257664452022, "yes": 0.05570040839662083}, "ground_truth": 1}, {"key": "38288018", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9144203698598884, "res": {"Yes": 0.9144203698598884, "yes": 0.07253877735784396}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8845301136532543, "res": {"Yes": 0.8845301136532543, "yes": 0.10799536170025896}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7570844749703342, "res": {"Yes": 0.7570844749703342, "yes": 0.2161752192308838}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8575660728707238, "res": {"Yes": 0.8575660728707238, "yes": 0.12441563789823701}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7452225875441205, "res": {"Yes": 0.7452225875441205, "yes": 0.2457642588175511}, "ground_truth": 1}, {"key": "40106293", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8061856645201666, "res": {"Yes": 0.8061856645201666, "yes": 0.18132008212156703}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7411411989125106, "res": {"Yes": 0.7411411989125106, "yes": 0.2450772039287874}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.964352082734337, "res": {"Yes": 0.964352082734337, "yes": 0.0328350643349657}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9630797273583592, "res": {"Yes": 0.9630797273583592, "yes": 0.03485025934684481}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7079810862368384, "res": {"Yes": 0.7079810862368384, "yes": 0.2900675941167704}, "ground_truth": 1}, {"key": "39948797", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7020827882006423, "res": {"Yes": 0.7020827882006423, "yes": 0.27075035010733217}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7372858837794053, "res": {"Yes": 0.7372858837794053, "yes": 0.10766182655883294}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7514861723687306, "res": {"Yes": 0.7514861723687306, "yes": 0.22983275195155062}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.838378860358752, "res": {"Yes": 0.838378860358752, "yes": 0.13889187864634667}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9582282812429829, "res": {"Yes": 0.9582282812429829, "yes": 0.037260902217434985}, "ground_truth": 1}, {"key": "31853399", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6050284906963556, "res": {"Yes": 0.6050284906963556, "yes": 0.3660891851469044}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9569742498968645, "res": {"Yes": 0.9569742498968645, "yes": 0.03598729570112019}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9356661719689436, "res": {"Yes": 0.9356661719689436, "yes": 0.05179570749718796}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9424912457801791, "res": {"Yes": 0.9424912457801791, "yes": 0.047763382000456614}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9475807456245757, "res": {"Yes": 0.9475807456245757, "yes": 0.03800454200314156}, "ground_truth": 1}, {"key": "35273252", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9549759844408103, "res": {"Yes": 0.9549759844408103, "yes": 0.03511721194143172}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.971834279887834, "res": {"Yes": 0.971834279887834, "yes": 0.022408032290496448}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.859761339893362, "res": {"Yes": 0.859761339893362, "yes": 0.09721142823431168}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9090083762960273, "res": {"Yes": 0.9090083762960273, "yes": 0.07457290809479289}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9295124793840561, "res": {"Yes": 0.9295124793840561, "yes": 0.05635764374765092}, "ground_truth": 1}, {"key": "37130459", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9216387439941087, "res": {"Yes": 0.9216387439941087, "yes": 0.06950843269713575}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9288122403629006, "res": {"Yes": 0.9288122403629006, "yes": 0.0599366864341842}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7849468111438138, "res": {"Yes": 0.7849468111438138, "yes": 0.17637494158622508}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7683616947454189, "res": {"Yes": 0.7683616947454189, "yes": 0.20504959546629034}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8567530913544182, "res": {"Yes": 0.8567530913544182, "yes": 0.10481769247684228}, "ground_truth": 1}, {"key": "21734003", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6939299223602138, "res": {"Yes": 0.6939299223602138, "yes": 0.2727615250953485}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8186050793664094, "res": {"Yes": 0.8186050793664094, "yes": 0.15757664565617868}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9758258040064445, "res": {"Yes": 0.9758258040064445, "yes": 0.01850488921356168}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7321400193852228, "res": {"Yes": 0.7321400193852228, "yes": 0.2606137637677955}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6476712510442458, "res": {"Yes": 0.6476712510442458, "yes": 0.3450558813982262}, "ground_truth": 1}, {"key": "33990737", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.757421346494647, "res": {"Yes": 0.757421346494647, "yes": 0.234839424122806}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7531242937973662, "res": {"Yes": 0.7531242937973662, "yes": 0.23980974480401127}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8905541139919162, "res": {"Yes": 0.8905541139919162, "yes": 0.10018455940100689}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7573680107614001, "res": {"Yes": 0.7573680107614001, "yes": 0.23206501720599487}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8682186197181299, "res": {"Yes": 0.8682186197181299, "yes": 0.12480030239660822}, "ground_truth": 1}, {"key": "34559912", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8851708692601379, "res": {"Yes": 0.8851708692601379, "yes": 0.10866704551013831}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8447334205819909, "res": {"Yes": 0.8447334205819909, "yes": 0.14796371562081925}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7581029762457024, "res": {"Yes": 0.7581029762457024, "yes": 0.23259633399646568}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7139636202296262, "res": {"Yes": 0.7139636202296262, "yes": 0.2790575349394219}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5183515494608827, "res": {"Yes": 0.5183515494608827, "yes": 0.4735957354464851}, "ground_truth": 1}, {"key": "39820439", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7417070982806957, "res": {"Yes": 0.7417070982806957, "yes": 0.24262432974765585}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7562055108467155, "res": {"Yes": 0.7562055108467155, "yes": 0.23662996194875785}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8543202632321342, "res": {"Yes": 0.8543202632321342, "yes": 0.1412892883529448}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7783546736168233, "res": {"Yes": 0.7783546736168233, "yes": 0.2149341020347105}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8228752412621273, "res": {"Yes": 0.8228752412621273, "yes": 0.1676838865068377}, "ground_truth": 1}, {"key": "34759328", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9218970325723945, "res": {"Yes": 0.9218970325723945, "yes": 0.061811985079245686}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6072082536654108, "res": {"Yes": 0.6072082536654108, "yes": 0.3788927331305138}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6725451178612866, "res": {"Yes": 0.6725451178612866, "yes": 0.30422762235756695}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7762387178583546, "res": {"Yes": 0.7762387178583546, "yes": 0.17230553702670096}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6421932359528436, "res": {"Yes": 0.6421932359528436, "yes": 0.3156661219729925}, "ground_truth": 1}, {"key": "36939137", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6049019259255864, "res": {"Yes": 0.6049019259255864, "yes": 0.3263805001057169}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7225174117340866, "res": {"Yes": 0.7225174117340866, "yes": 0.22700982057309155}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8650439189383908, "res": {"Yes": 0.8650439189383908, "yes": 0.13119808481832435}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8138789207913062, "res": {"Yes": 0.8138789207913062, "yes": 0.1839247817112831}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9728047519160901, "res": {"Yes": 0.9728047519160901, "yes": 0.020857660252710263}, "ground_truth": 1}, {"key": "35851522", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9723345456222978, "res": {"Yes": 0.9723345456222978, "yes": 0.019722152157236928}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.96626191981131, "res": {"Yes": 0.96626191981131, "yes": 0.02799756323263305}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7508142460169067, "res": {"Yes": 0.7508142460169067, "yes": 0.23979598011970973}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6708489225793088, "res": {"Yes": 0.6708489225793088, "yes": 0.3187868121754438}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7694000682180402, "res": {"Yes": 0.7694000682180402, "yes": 0.21995381202770384}, "ground_truth": 1}, {"key": "22412782", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7690351744618478, "res": {"Yes": 0.7690351744618478, "yes": 0.22233160397239027}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7461262777351748, "res": {"Yes": 0.7461262777351748, "yes": 0.24399449876211676}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8491551881791475, "res": {"Yes": 0.8491551881791475, "yes": 0.14657239628415322}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8532264345803332, "res": {"Yes": 0.8532264345803332, "yes": 0.13978534262413567}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9034376597019907, "res": {"Yes": 0.9034376597019907, "yes": 0.0884970022874615}, "ground_truth": 1}, {"key": "38579227", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9502700372899217, "res": {"Yes": 0.9502700372899217, "yes": 0.03972021606964885}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9253185429453313, "res": {"Yes": 0.9253185429453313, "yes": 0.06451259518873437}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9209201607981417, "res": {"Yes": 0.9209201607981417, "yes": 0.07500581013347639}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8888791073039387, "res": {"Yes": 0.8888791073039387, "yes": 0.10484033554378047}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.870050702757548, "res": {"Yes": 0.870050702757548, "yes": 0.1242948143700299}, "ground_truth": 1}, {"key": "37206995", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9055832805444244, "res": {"Yes": 0.9055832805444244, "yes": 0.08872081054430159}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8427684352466127, "res": {"Yes": 0.8427684352466127, "yes": 0.1512450817432331}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9434147719973566, "res": {"Yes": 0.9434147719973566, "yes": 0.049095480267356655}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8299457846696805, "res": {"Yes": 0.8299457846696805, "yes": 0.16211926676889352}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9396070058691761, "res": {"Yes": 0.9396070058691761, "yes": 0.05313381160424742}, "ground_truth": 1}, {"key": "38700847", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8532096005888468, "res": {"Yes": 0.8532096005888468, "yes": 0.1365503729270831}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8550353464694045, "res": {"Yes": 0.8550353464694045, "yes": 0.13271298369049203}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9432000818310696, "res": {"Yes": 0.9432000818310696, "yes": 0.0431017275633237}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9517760532941492, "res": {"Yes": 0.9517760532941492, "yes": 0.04420202395985113}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9554606335438471, "res": {"Yes": 0.9554606335438471, "yes": 0.04044025415931847}, "ground_truth": 1}, {"key": "20246590", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9505009990270223, "res": {"Yes": 0.9505009990270223, "yes": 0.045733216821342064}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9274015089040053, "res": {"Yes": 0.9274015089040053, "yes": 0.06447014696982245}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.4784807015698217, "res": {"Yes": 0.4784807015698217, "yes": 0.45787237083579974}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.973932508365238, "res": {"Yes": 0.973932508365238, "yes": 0.020520949250060105}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2996702426638648, "res": {"yes": 0.5649966087873456, "Yes": 0.2996702426638648}, "ground_truth": 1}, {"key": "39141360", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5791678424985712, "res": {"Yes": 0.5791678424985712, "yes": 0.3753565893132466}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.622230030908181, "res": {"Yes": 0.622230030908181, "yes": 0.33706236715853494}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6997450392005399, "res": {"Yes": 0.6997450392005399, "yes": 0.2948477669724846}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.890256412337382, "res": {"Yes": 0.890256412337382, "yes": 0.10234887076586924}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8656566301746927, "res": {"Yes": 0.8656566301746927, "yes": 0.12618633692369974}, "ground_truth": 1}, {"key": "37906226", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8662945683905664, "res": {"Yes": 0.8662945683905664, "yes": 0.12836776123199847}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7753997160061641, "res": {"Yes": 0.7753997160061641, "yes": 0.2186702263060529}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9798373632079308, "res": {"Yes": 0.9798373632079308, "yes": 0.015025703164830427}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8863958667167907, "res": {"Yes": 0.8863958667167907, "yes": 0.10993382336497949}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8561545500616391, "res": {"Yes": 0.8561545500616391, "yes": 0.14042056833608}, "ground_truth": 1}, {"key": "16201033", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8936765501512209, "res": {"Yes": 0.8936765501512209, "yes": 0.10100183439288683}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9283600931955057, "res": {"Yes": 0.9283600931955057, "yes": 0.0675959168084075}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9753646869710582, "res": {"Yes": 0.9753646869710582, "yes": 0.018450418976500984}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8230950112089312, "res": {"Yes": 0.8230950112089312, "yes": 0.17309255976797905}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.977834540258441, "res": {"Yes": 0.977834540258441, "yes": 0.016353189895930553}, "ground_truth": 1}, {"key": "36469022", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9761930626877475, "res": {"Yes": 0.9761930626877475, "yes": 0.01769019584372081}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.982119203992814, "res": {"Yes": 0.982119203992814, "yes": 0.013137739666696091}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9337275366123661, "res": {"Yes": 0.9337275366123661, "yes": 0.061003795221396644}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9443842369346767, "res": {"Yes": 0.9443842369346767, "yes": 0.04822585456375085}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9186211069977697, "res": {"Yes": 0.9186211069977697, "yes": 0.0742753385189971}, "ground_truth": 1}, {"key": "31295270", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9005380958875022, "res": {"Yes": 0.9005380958875022, "yes": 0.0878968158178544}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9214424558470677, "res": {"Yes": 0.9214424558470677, "yes": 0.07040780186587416}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6082662692740595, "res": {"Yes": 0.6082662692740595, "yes": 0.38544693802699}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6723014182231501, "res": {"Yes": 0.6723014182231501, "yes": 0.3242890277120339}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5908209550579764, "res": {"Yes": 0.5908209550579764, "yes": 0.4038562423955885}, "ground_truth": 1}, {"key": "35360689", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9289273366182451, "res": {"Yes": 0.9289273366182451, "yes": 0.06529733860252986}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6855576392492505, "res": {"Yes": 0.6855576392492505, "yes": 0.3088541870584862}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8867305585105888, "res": {"Yes": 0.8867305585105888, "yes": 0.1019413735559741}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8449240054905565, "res": {"Yes": 0.8449240054905565, "yes": 0.1451513337185714}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8195077859411544, "res": {"Yes": 0.8195077859411544, "yes": 0.17131256936772293}, "ground_truth": 1}, {"key": "29202793", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8751881037624317, "res": {"Yes": 0.8751881037624317, "yes": 0.117236068204399}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9349022131772419, "res": {"Yes": 0.9349022131772419, "yes": 0.05386266029720275}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5325742790248822, "res": {"Yes": 0.5325742790248822, "yes": 0.46151424905057364}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6400552360656631, "res": {"Yes": 0.6400552360656631, "yes": 0.35606367562828173}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9836434757268006, "res": {"Yes": 0.9836434757268006, "yes": 0.014077464818055718}, "ground_truth": 1}, {"key": "35999008", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9640710247105325, "res": {"Yes": 0.9640710247105325, "yes": 0.030406599661597047}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9500177693343548, "res": {"Yes": 0.9500177693343548, "yes": 0.04631145223441747}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9735305646511848, "res": {"Yes": 0.9735305646511848, "yes": 0.023137900307259696}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9859756070359262, "res": {"Yes": 0.9859756070359262, "yes": 0.011232656324014472}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9874500448303709, "res": {"Yes": 0.9874500448303709, "yes": 0.009362152939857284}, "ground_truth": 1}, {"key": "31797119", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9841153186957955, "res": {"Yes": 0.9841153186957955, "yes": 0.014910971421207663}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9800628810884456, "res": {"Yes": 0.9800628810884456, "yes": 0.013754874618321476}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7942411885063505, "res": {"Yes": 0.7942411885063505, "yes": 0.2018782471865434}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.824139060968751, "res": {"Yes": 0.824139060968751, "yes": 0.17087089630468724}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9341261832864827, "res": {"Yes": 0.9341261832864827, "yes": 0.05916382429341708}, "ground_truth": 1}, {"key": "26711893", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9136908347007673, "res": {"Yes": 0.9136908347007673, "yes": 0.07070033272061133}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.87947316229506, "res": {"Yes": 0.87947316229506, "yes": 0.11670481277101734}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.28930253630423164, "res": {"yes": 0.541324480943077, "Yes": 0.28930253630423164}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.24036636470083098, "res": {"yes": 0.6833068584500396, "Yes": 0.24036636470083098}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3228215775080118, "res": {"yes": 0.5883462993326168, "Yes": 0.3228215775080118}, "ground_truth": 1}, {"key": "35348288", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3260383064730455, "res": {"yes": 0.6134494342428976, "Yes": 0.3260383064730455}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.643698455072654, "res": {"Yes": 0.643698455072654, "yes": 0.3458697222904236}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6997439336042514, "res": {"Yes": 0.6997439336042514, "yes": 0.29505514733346566}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9088213488854119, "res": {"Yes": 0.9088213488854119, "yes": 0.08312612884567347}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8703743434932932, "res": {"Yes": 0.8703743434932932, "yes": 0.12208081933607594}, "ground_truth": 1}, {"key": "38124131", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8995562885057234, "res": {"Yes": 0.8995562885057234, "yes": 0.095890007461829}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7942832923477057, "res": {"Yes": 0.7942832923477057, "yes": 0.19888846276663036}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8717492968697272, "res": {"Yes": 0.8717492968697272, "yes": 0.12341499000121947}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8499104907863215, "res": {"Yes": 0.8499104907863215, "yes": 0.1468952692795336}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9083192729443768, "res": {"Yes": 0.9083192729443768, "yes": 0.08880698252829129}, "ground_truth": 1}, {"key": "20285901", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7743641174738601, "res": {"Yes": 0.7743641174738601, "yes": 0.22244651239619756}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8904060040059646, "res": {"Yes": 0.8904060040059646, "yes": 0.1058595567177141}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6827716698923718, "res": {"Yes": 0.6827716698923718, "yes": 0.3126192132973904}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8806760392714051, "res": {"Yes": 0.8806760392714051, "yes": 0.11447091565511623}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7260806588385095, "res": {"Yes": 0.7260806588385095, "yes": 0.2699895565975926}, "ground_truth": 1}, {"key": "35633632", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.584863553988794, "res": {"Yes": 0.584863553988794, "yes": 0.40698383979142805}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.868255736857496, "res": {"Yes": 0.868255736857496, "yes": 0.12819688920007447}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7823498656189368, "res": {"Yes": 0.7823498656189368, "yes": 0.21389192818528335}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8981143502305756, "res": {"Yes": 0.8981143502305756, "yes": 0.10017292864883866}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8408673334718045, "res": {"Yes": 0.8408673334718045, "yes": 0.15533402200901464}, "ground_truth": 1}, {"key": "10741274", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8526912455736853, "res": {"Yes": 0.8526912455736853, "yes": 0.14265155508214908}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7841193798553517, "res": {"Yes": 0.7841193798553517, "yes": 0.21214363715128623}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.29325844321383354, "res": {"yes": 0.6467403815511954, "Yes": 0.29325844321383354}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9652268032341634, "res": {"Yes": 0.9652268032341634, "yes": 0.026617017822379605}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3432965253129823, "res": {"yes": 0.5427351941033843, "Yes": 0.3432965253129823}, "ground_truth": 1}, {"key": "30605795", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5514022499329821, "res": {"Yes": 0.5514022499329821, "yes": 0.4037066010024195}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.4320851947983641, "res": {"yes": 0.5345058520143917, "Yes": 0.4320851947983641}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.724474669909712, "res": {"Yes": 0.724474669909712, "yes": 0.26706863331507347}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.866345291422448, "res": {"Yes": 0.866345291422448, "yes": 0.12878499120671014}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7467227156617714, "res": {"Yes": 0.7467227156617714, "yes": 0.24922930566657786}, "ground_truth": 1}, {"key": "30539722", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8258277188799924, "res": {"Yes": 0.8258277188799924, "yes": 0.16845945184804195}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7585542044935165, "res": {"Yes": 0.7585542044935165, "yes": 0.23578155901037295}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7708852647858225, "res": {"Yes": 0.7708852647858225, "yes": 0.18987101609711488}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8926306718580141, "res": {"Yes": 0.8926306718580141, "yes": 0.08411789098221792}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8720047393966728, "res": {"Yes": 0.8720047393966728, "yes": 0.11607374002155278}, "ground_truth": 1}, {"key": "18639299", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8545481066505405, "res": {"Yes": 0.8545481066505405, "yes": 0.1261567370812547}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8125758624237077, "res": {"Yes": 0.8125758624237077, "yes": 0.1744970400408743}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9577654978625587, "res": {"Yes": 0.9577654978625587, "yes": 0.03796368249753385}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9393273546756894, "res": {"Yes": 0.9393273546756894, "yes": 0.05712179136559947}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.947368425098156, "res": {"Yes": 0.947368425098156, "yes": 0.04873777900706948}, "ground_truth": 1}, {"key": "39773552", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9406261334960387, "res": {"Yes": 0.9406261334960387, "yes": 0.05527781645674188}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9473541928892113, "res": {"Yes": 0.9473541928892113, "yes": 0.04785207965802163}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9728876773273742, "res": {"Yes": 0.9728876773273742, "yes": 0.0186626066773352}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9758856385999775, "res": {"Yes": 0.9758856385999775, "yes": 0.018293307434247008}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9580061302724168, "res": {"Yes": 0.9580061302724168, "yes": 0.035674158637137826}, "ground_truth": 1}, {"key": "34086410", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7867699388875031, "res": {"Yes": 0.7867699388875031, "yes": 0.20579686272272732}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.973232275810815, "res": {"Yes": 0.973232275810815, "yes": 0.02302024115161009}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7288751672329657, "res": {"Yes": 0.7288751672329657, "yes": 0.2654721341311382}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.634096961575187, "res": {"Yes": 0.634096961575187, "yes": 0.35501227948725556}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.978313737892554, "res": {"Yes": 0.978313737892554, "yes": 0.017283568958185714}, "ground_truth": 1}, {"key": "35454652", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.45725697389811065, "res": {"yes": 0.5365580776112617, "Yes": 0.45725697389811065}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7274409875473754, "res": {"Yes": 0.7274409875473754, "yes": 0.26522711030739127}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7373745551114889, "res": {"Yes": 0.7373745551114889, "yes": 0.24825505840764928}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.419894327731819, "res": {"yes": 0.5626099485023408, "Yes": 0.419894327731819}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.759099155850461, "res": {"Yes": 0.759099155850461, "yes": 0.23620152945165007}, "ground_truth": 1}, {"key": "36158310", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5231436956384671, "res": {"Yes": 0.5231436956384671, "yes": 0.47176580140514845}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6335554078393955, "res": {"Yes": 0.6335554078393955, "yes": 0.34288557090469146}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8921160535396082, "res": {"Yes": 0.8921160535396082, "yes": 0.09952970864866258}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9191321255127033, "res": {"Yes": 0.9191321255127033, "yes": 0.07309614131559672}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9059758671442925, "res": {"Yes": 0.9059758671442925, "yes": 0.089705544520941}, "ground_truth": 1}, {"key": "35688387", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9294157779361976, "res": {"Yes": 0.9294157779361976, "yes": 0.06466404180856111}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9088902855991167, "res": {"Yes": 0.9088902855991167, "yes": 0.08395576718183571}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5751915863668476, "res": {"Yes": 0.5751915863668476, "yes": 0.24219180115122882}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.3221719080394021, "res": {"yes": 0.6286823863317326, "Yes": 0.3221719080394021}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.41997541715543585, "res": {"yes": 0.48128658764243437, "Yes": 0.41997541715543585}, "ground_truth": 1}, {"key": "34209292", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.49984286497656266, "res": {"Yes": 0.49984286497656266, "yes": 0.4167781137993572}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9688221244894897, "res": {"Yes": 0.9688221244894897, " Yes": 0.01575528365199542}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9461358884029518, "res": {"Yes": 0.9461358884029518, "yes": 0.03884696601772074}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6097067531162821, "res": {"Yes": 0.6097067531162821, "yes": 0.38248612533784476}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9620467815778516, "res": {"Yes": 0.9620467815778516, "yes": 0.029038469313644066}, "ground_truth": 1}, {"key": "25037859", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5695048385466, "res": {"Yes": 0.5695048385466, "yes": 0.42042822659487084}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.46679583549492953, "res": {"yes": 0.5275459544424356, "Yes": 0.46679583549492953}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9393874077930513, "res": {"Yes": 0.9393874077930513, "yes": 0.048620827037812016}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8705214840082832, "res": {"Yes": 0.8705214840082832, "yes": 0.11824405827717595}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8118840374833616, "res": {"Yes": 0.8118840374833616, "yes": 0.16267253282476635}, "ground_truth": 1}, {"key": "36412121", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9910308697860688, "res": {"Yes": 0.9910308697860688, "yes": 0.0064887621393130285}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9095845883646331, "res": {"Yes": 0.9095845883646331, "yes": 0.07348873866089989}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6511480391092823, "res": {"Yes": 0.6511480391092823, "yes": 0.33843609270765523}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6457237488628427, "res": {"Yes": 0.6457237488628427, "yes": 0.34798089504870994}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7563280260629373, "res": {"Yes": 0.7563280260629373, "yes": 0.23724206210179655}, "ground_truth": 1}, {"key": "34909172", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4745343127778868, "res": {"yes": 0.516206138530812, "Yes": 0.4745343127778868}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5441989075919702, "res": {"Yes": 0.5441989075919702, "yes": 0.451078453528121}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8375090159729714, "res": {"Yes": 0.8375090159729714, "yes": 0.15643811163899135}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8403659560950533, "res": {"Yes": 0.8403659560950533, "yes": 0.14955183856497078}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.916717974843032, "res": {"Yes": 0.916717974843032, "yes": 0.06809446136803934}, "ground_truth": 1}, {"key": "39011806", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8355993058671396, "res": {"Yes": 0.8355993058671396, "yes": 0.155230650561567}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9206151417841584, "res": {"Yes": 0.9206151417841584, "yes": 0.06561754267210726}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.716924438192125, "res": {"Yes": 0.716924438192125, "yes": 0.27639712188627996}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7155059815947223, "res": {"Yes": 0.7155059815947223, "yes": 0.28015154222182675}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6032393373349823, "res": {"Yes": 0.6032393373349823, "yes": 0.38814348539570354}, "ground_truth": 1}, {"key": "33096163", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7014460423021492, "res": {"Yes": 0.7014460423021492, "yes": 0.29343395223869767}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6833370886141437, "res": {"Yes": 0.6833370886141437, "yes": 0.30483354666166596}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8185552196495183, "res": {"Yes": 0.8185552196495183, "yes": 0.1538024180488574}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7213347094106141, "res": {"Yes": 0.7213347094106141, "yes": 0.2558005857410762}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5353370663025235, "res": {"Yes": 0.5353370663025235, "yes": 0.4426041389335844}, "ground_truth": 1}, {"key": "38762205", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5450844184911782, "res": {"Yes": 0.5450844184911782, "yes": 0.42596359249148624}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7734998522375481, "res": {"Yes": 0.7734998522375481, "yes": 0.20262682341564528}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8473525306507834, "res": {"Yes": 0.8473525306507834, "yes": 0.1482831288447478}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8611975141428371, "res": {"Yes": 0.8611975141428371, "yes": 0.06159788767628568}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7973162487199406, "res": {"Yes": 0.7973162487199406, "yes": 0.18651625229912833}, "ground_truth": 1}, {"key": "35519177", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9877068685569005, "res": {"Yes": 0.9877068685569005, "yes": 0.00967658431484574}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9526364239435496, "res": {"Yes": 0.9526364239435496, "yes": 0.045193635932972445}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8958827266578125, "res": {"Yes": 0.8958827266578125, "yes": 0.09704531932995555}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6106120891945128, "res": {"Yes": 0.6106120891945128, "yes": 0.3825094194521764}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9709730742431101, "res": {"Yes": 0.9709730742431101, "yes": 0.02266945172049964}, "ground_truth": 1}, {"key": "36192531", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8242194678977254, "res": {"Yes": 0.8242194678977254, "yes": 0.170535242967008}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9198186756950272, "res": {"Yes": 0.9198186756950272, "yes": 0.07525672579588039}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9425826417024681, "res": {"Yes": 0.9425826417024681, "yes": 0.04848334219167724}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9246663770384298, "res": {"Yes": 0.9246663770384298, "yes": 0.06654722719485069}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8937491017583921, "res": {"Yes": 0.8937491017583921, "yes": 0.09745187622569533}, "ground_truth": 1}, {"key": "33160852", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9161039850079157, "res": {"Yes": 0.9161039850079157, "yes": 0.07390099403765318}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9179212925502926, "res": {"Yes": 0.9179212925502926, "yes": 0.07581096043644844}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7857703773582044, "res": {"Yes": 0.7857703773582044, "yes": 0.20793438620155494}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6737992911692983, "res": {"Yes": 0.6737992911692983, "yes": 0.3164053608386596}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6208312457205236, "res": {"Yes": 0.6208312457205236, "yes": 0.3734393539060196}, "ground_truth": 1}, {"key": "36312304", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8455097576703858, "res": {"Yes": 0.8455097576703858, "yes": 0.14945983267663343}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6143921094028147, "res": {"Yes": 0.6143921094028147, "yes": 0.37928060699575034}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7099041060286065, "res": {"Yes": 0.7099041060286065, "yes": 0.2859264697483028}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7715901837290962, "res": {"Yes": 0.7715901837290962, "yes": 0.22307282306540244}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7098906534732607, "res": {"Yes": 0.7098906534732607, "yes": 0.2843422672721049}, "ground_truth": 1}, {"key": "33773343", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.835233827833303, "res": {"Yes": 0.835233827833303, "yes": 0.15901360728736846}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7870823485624587, "res": {"Yes": 0.7870823485624587, "yes": 0.20431419467659115}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7919020730114268, "res": {"Yes": 0.7919020730114268, "yes": 0.18606098177454947}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9024686556138464, "res": {"Yes": 0.9024686556138464, "yes": 0.09023751764295417}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7938152479594578, "res": {"Yes": 0.7938152479594578, "yes": 0.18012903928761362}, "ground_truth": 1}, {"key": "34913320", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7946266955134853, "res": {"Yes": 0.7946266955134853, "yes": 0.182260754542316}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9793557494686909, "res": {"Yes": 0.9793557494686909, "yes": 0.015721501205617928}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.4279666957911726, "res": {"yes": 0.4297400663228772, "Yes": 0.4279666957911726}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.47116948920685975, "res": {"yes": 0.5009984358289751, "Yes": 0.47116948920685975}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3552340538976473, "res": {"yes": 0.48024360183201986, "Yes": 0.3552340538976473}, "ground_truth": 1}, {"key": "33784155", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4223353129532994, "res": {"yes": 0.5683245685259637, "Yes": 0.4223353129532994}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.08642276103160415, "res": {"yes": 0.8457489608456441, "Yes": 0.08642276103160415}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.956259833973896, "res": {"Yes": 0.956259833973896, "yes": 0.03989792346198123}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9271682188700396, "res": {"Yes": 0.9271682188700396, "yes": 0.06778661651872628}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9014504065485679, "res": {"Yes": 0.9014504065485679, "yes": 0.09084256733317427}, "ground_truth": 1}, {"key": "24085062", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9271665814924109, "res": {"Yes": 0.9271665814924109, "yes": 0.0689672034293392}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9381380987503276, "res": {"Yes": 0.9381380987503276, "yes": 0.05863447176371665}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9662505663004542, "res": {"Yes": 0.9662505663004542, "yes": 0.022268951056769418}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7726295747282543, "res": {"Yes": 0.7726295747282543, "yes": 0.2207016697605983}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7527403736087294, "res": {"Yes": 0.7527403736087294, "yes": 0.239679491556158}, "ground_truth": 1}, {"key": "33893487", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.747555192741196, "res": {"Yes": 0.747555192741196, "yes": 0.24413720406137157}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7942538170416443, "res": {"Yes": 0.7942538170416443, "yes": 0.19983359175450555}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.806043965658335, "res": {"Yes": 0.806043965658335, "yes": 0.13599515756767971}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9107473161088269, "res": {"Yes": 0.9107473161088269, "yes": 0.08370459538058289}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9898986923987696, "res": {"Yes": 0.9898986923987696, "yes": 0.008316795122340473}, "ground_truth": 1}, {"key": "40913011", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6503802416349717, "res": {"Yes": 0.6503802416349717, "yes": 0.2463232700098318}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8851340699313722, "res": {"Yes": 0.8851340699313722, "yes": 0.09384488681116662}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8419525670470067, "res": {"Yes": 0.8419525670470067, "yes": 0.1477642323637624}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8444861272880471, "res": {"Yes": 0.8444861272880471, "yes": 0.14713730213602913}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8457123068799782, "res": {"Yes": 0.8457123068799782, "yes": 0.14605986603239418}, "ground_truth": 1}, {"key": "29642545", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7627764338532169, "res": {"Yes": 0.7627764338532169, "yes": 0.21577007625978656}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.811929780318597, "res": {"Yes": 0.811929780318597, "yes": 0.17473701618483953}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.2129453327435674, "res": {"yes": 0.7431438098740529, "Yes": 0.2129453327435674}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6121222080795462, "res": {"Yes": 0.6121222080795462, "yes": 0.3742809463960322}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6642578012251258, "res": {"Yes": 0.6642578012251258, "yes": 0.3176550606080977}, "ground_truth": 1}, {"key": "35969159", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3445001682980831, "res": {"yes": 0.6446719475636201, "Yes": 0.3445001682980831}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7543144164118045, "res": {"Yes": 0.7543144164118045, "yes": 0.20907578188228107}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.97550999006761, "res": {"Yes": 0.97550999006761, "yes": 0.019346559092423225}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8442885068812587, "res": {"Yes": 0.8442885068812587, "yes": 0.1348077890369549}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8730712337194849, "res": {"Yes": 0.8730712337194849, "yes": 0.11105572240309342}, "ground_truth": 1}, {"key": "37081669", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8768358730666854, "res": {"Yes": 0.8768358730666854, "yes": 0.10399083590721607}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9198076747294517, "res": {"Yes": 0.9198076747294517, "yes": 0.06335822793909832}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8603874310146055, "res": {"Yes": 0.8603874310146055, "yes": 0.13322109001011337}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7909231432247564, "res": {"Yes": 0.7909231432247564, "yes": 0.2007727715920923}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9723150524547274, "res": {"Yes": 0.9723150524547274, "yes": 0.021771543629049664}, "ground_truth": 1}, {"key": "40048022", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9849626542458876, "res": {"Yes": 0.9849626542458876, "yes": 0.00867841821683387}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9656383632764421, "res": {"Yes": 0.9656383632764421, "yes": 0.027466230794308242}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6301299443452061, "res": {"Yes": 0.6301299443452061, "yes": 0.23714305792868356}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7039152454903668, "res": {"Yes": 0.7039152454903668, "yes": 0.2091428850668172}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5936185426493624, "res": {"Yes": 0.5936185426493624, "yes": 0.17973872686149722}, "ground_truth": 1}, {"key": "32884004", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7229192864889384, "res": {"Yes": 0.7229192864889384, "yes": 0.20908841044088539}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7802017335357809, "res": {"Yes": 0.7802017335357809, "yes": 0.17807624102996236}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9337734818061625, "res": {"Yes": 0.9337734818061625, "yes": 0.053973947637083054}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9831196509981593, "res": {"Yes": 0.9831196509981593, "yes": 0.01050671187338429}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8619372817122704, "res": {"Yes": 0.8619372817122704, "yes": 0.13442114962394347}, "ground_truth": 1}, {"key": "39022490", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7309992538192349, "res": {"Yes": 0.7309992538192349, "yes": 0.26157014252281047}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9557531342970558, "res": {"Yes": 0.9557531342970558, "yes": 0.032120539296230224}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9633030259405906, "res": {"Yes": 0.9633030259405906, "yes": 0.030993765736362436}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6872302294329352, "res": {"Yes": 0.6872302294329352, "yes": 0.30466556870972256}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7074394402133187, "res": {"Yes": 0.7074394402133187, "yes": 0.28791637661045516}, "ground_truth": 1}, {"key": "35159385", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7687949730839053, "res": {"Yes": 0.7687949730839053, "yes": 0.22160115766558014}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7580809233508843, "res": {"Yes": 0.7580809233508843, "yes": 0.23517153381040934}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9744087920766915, "res": {"Yes": 0.9744087920766915, " Yes": 0.012376917908882202}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7914990182723459, "res": {"Yes": 0.7914990182723459, "yes": 0.17940881402323686}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8485006060904323, "res": {"Yes": 0.8485006060904323, "yes": 0.13072303899694943}, "ground_truth": 1}, {"key": "34363669", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.897127138059467, "res": {"Yes": 0.897127138059467, "yes": 0.0940494566968302}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7798386497435323, "res": {"Yes": 0.7798386497435323, "yes": 0.20985450955760565}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7918679823609902, "res": {"Yes": 0.7918679823609902, "yes": 0.1774928593304686}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8764589059285363, "res": {"Yes": 0.8764589059285363, "yes": 0.11937637045450236}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8963439508063961, "res": {"Yes": 0.8963439508063961, "yes": 0.1024295124890264}, "ground_truth": 1}, {"key": "36119687", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.994233310156827, "res": {"Yes": 0.994233310156827, "yes": 0.004627895730718088}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9959170151412123, "res": {"Yes": 0.9959170151412123, "yes": 0.0031366246796937984}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9769141801221877, "res": {"Yes": 0.9769141801221877, "yes": 0.01812489266844315}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7744930908132961, "res": {"Yes": 0.7744930908132961, "yes": 0.22164302207823244}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9786487258314163, "res": {"Yes": 0.9786487258314163, "yes": 0.015106281927281648}, "ground_truth": 1}, {"key": "35217446", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8047539181789293, "res": {"Yes": 0.8047539181789293, "yes": 0.190112647676339}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6887591043123876, "res": {"Yes": 0.6887591043123876, "yes": 0.3066126973490331}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9091029508109554, "res": {"Yes": 0.9091029508109554, "yes": 0.08152018668653004}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8457918498628905, "res": {"Yes": 0.8457918498628905, "yes": 0.1460482547345998}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7780375898253792, "res": {"Yes": 0.7780375898253792, "yes": 0.21519719180778055}, "ground_truth": 1}, {"key": "39049331", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6932792311463464, "res": {"Yes": 0.6932792311463464, "yes": 0.2967380250414811}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9917231909674571, "res": {"Yes": 0.9917231909674571, "yes": 0.0046564496448932}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7019601801487428, "res": {"Yes": 0.7019601801487428, "yes": 0.2938162520676948}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9721015282931874, "res": {"Yes": 0.9721015282931874, "yes": 0.02176179216199557}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9723859406952983, "res": {"Yes": 0.9723859406952983, "yes": 0.023131193602144502}, "ground_truth": 1}, {"key": "36472242", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7791718298218763, "res": {"Yes": 0.7791718298218763, "yes": 0.2173053873074824}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8556361770505468, "res": {"Yes": 0.8556361770505468, "yes": 0.1349413553148651}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9879399929098247, "res": {"Yes": 0.9879399929098247, "yes": 0.006785937654726382}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9912064068076387, "res": {"Yes": 0.9912064068076387, "yes": 0.007255362705712864}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9885078678700052, "res": {"Yes": 0.9885078678700052, "yes": 0.009491458405810338}, "ground_truth": 1}, {"key": "31854721", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9894149620064593, "res": {"Yes": 0.9894149620064593, "yes": 0.008524575941349356}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7019097320834685, "res": {"Yes": 0.7019097320834685, "yes": 0.2930502157023741}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9161652194447056, "res": {"Yes": 0.9161652194447056, "yes": 0.07949695976522089}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8205187966001587, "res": {"Yes": 0.8205187966001587, "yes": 0.17096293506155846}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.862195962041556, "res": {"Yes": 0.862195962041556, "yes": 0.1321286248715816}, "ground_truth": 1}, {"key": "18725849", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8476349918438548, "res": {"Yes": 0.8476349918438548, "yes": 0.14089549973678953}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8833316449934282, "res": {"Yes": 0.8833316449934282, "yes": 0.11196059499451462}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7771972498662295, "res": {"Yes": 0.7771972498662295, "yes": 0.21829830063416236}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7508527487586444, "res": {"Yes": 0.7508527487586444, "yes": 0.24239590948287765}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.806789909336501, "res": {"Yes": 0.806789909336501, "yes": 0.18944942057138162}, "ground_truth": 1}, {"key": "36883179", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7657507376237853, "res": {"Yes": 0.7657507376237853, "yes": 0.22952438555146595}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7383855982343644, "res": {"Yes": 0.7383855982343644, "yes": 0.253822011429972}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5098349311837312, "res": {"Yes": 0.5098349311837312, "yes": 0.4842572525188784}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8335872193286347, "res": {"Yes": 0.8335872193286347, "yes": 0.16085069572554342}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7623099099551975, "res": {"Yes": 0.7623099099551975, "yes": 0.2275727298575706}, "ground_truth": 1}, {"key": "34266359", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8005209626040347, "res": {"Yes": 0.8005209626040347, "yes": 0.18707510719694556}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5118385682733932, "res": {"Yes": 0.5118385682733932, "yes": 0.4834233539626642}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7977259831594532, "res": {"Yes": 0.7977259831594532, "yes": 0.1833664260473698}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7169602351229869, "res": {"Yes": 0.7169602351229869, "yes": 0.26171135003014834}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7916104691800785, "res": {"Yes": 0.7916104691800785, "yes": 0.18995389389115905}, "ground_truth": 1}, {"key": "31920289", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7597244327041901, "res": {"Yes": 0.7597244327041901, "yes": 0.2210234885677595}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7717134473898515, "res": {"Yes": 0.7717134473898515, "yes": 0.20816790278161731}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8802541732284674, "res": {"Yes": 0.8802541732284674, "yes": 0.11628860542114201}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7693063609966169, "res": {"Yes": 0.7693063609966169, "yes": 0.22834642391883228}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.765781061953413, "res": {"Yes": 0.765781061953413, "yes": 0.23082403776723992}, "ground_truth": 1}, {"key": "36292997", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8092364097118694, "res": {"Yes": 0.8092364097118694, "yes": 0.18748363825861417}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8439524709602619, "res": {"Yes": 0.8439524709602619, "yes": 0.1504587528196693}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9681606352178513, "res": {"Yes": 0.9681606352178513, "yes": 0.0267384412988559}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8187399229137673, "res": {"Yes": 0.8187399229137673, "yes": 0.17895163592723742}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6894744763882068, "res": {"Yes": 0.6894744763882068, "yes": 0.3062639399362306}, "ground_truth": 1}, {"key": "30412533", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6527561367945894, "res": {"Yes": 0.6527561367945894, "yes": 0.34417256670275426}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6046533865034864, "res": {"Yes": 0.6046533865034864, "yes": 0.39051011786986073}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7162273460534839, "res": {"Yes": 0.7162273460534839, "yes": 0.27633913985263625}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9642361321538212, "res": {"Yes": 0.9642361321538212, "yes": 0.03120296988734193}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7746385620207279, "res": {"Yes": 0.7746385620207279, "yes": 0.1792595183385138}, "ground_truth": 1}, {"key": "40433191", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7374429423987995, "res": {"Yes": 0.7374429423987995, "yes": 0.23950492468104534}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6084215308033846, "res": {"Yes": 0.6084215308033846, "yes": 0.33994902468813587}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9155447789943426, "res": {"Yes": 0.9155447789943426, "yes": 0.0784544717619964}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9004804903080424, "res": {"Yes": 0.9004804903080424, "yes": 0.09543164947914558}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7709692881496015, "res": {"Yes": 0.7709692881496015, "yes": 0.2234745593095717}, "ground_truth": 1}, {"key": "34565591", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9114955654387941, "res": {"Yes": 0.9114955654387941, "yes": 0.0834947769983234}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8931071915312042, "res": {"Yes": 0.8931071915312042, "yes": 0.10277450115173012}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9061144413140908, "res": {"Yes": 0.9061144413140908, "yes": 0.0909525078915138}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9764156571519319, "res": {"Yes": 0.9764156571519319, "yes": 0.021562167599171036}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9902776694372406, "res": {"Yes": 0.9902776694372406, "yes": 0.008277756511638068}, "ground_truth": 1}, {"key": "36062480", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9000468729125957, "res": {"Yes": 0.9000468729125957, "yes": 0.09684575701010165}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9813550404719302, "res": {"Yes": 0.9813550404719302, "yes": 0.015859514101061737}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8764146896920961, "res": {"Yes": 0.8764146896920961, "yes": 0.12112859852836609}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.918421571096945, "res": {"Yes": 0.918421571096945, "yes": 0.07881275734348737}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8182988392003772, "res": {"Yes": 0.8182988392003772, "yes": 0.17818497194878746}, "ground_truth": 1}, {"key": "37276883", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8676178554172547, "res": {"Yes": 0.8676178554172547, "yes": 0.1296331163906466}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9256664207572837, "res": {"Yes": 0.9256664207572837, "yes": 0.07219413276702753}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7557984944983479, "res": {"Yes": 0.7557984944983479, "yes": 0.17144523376880222}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8174749163871028, "res": {"Yes": 0.8174749163871028, "yes": 0.14892421896002855}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7746278333509401, "res": {"Yes": 0.7746278333509401, "yes": 0.16110555624136597}, "ground_truth": 1}, {"key": "38509260", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8646664701918179, "res": {"Yes": 0.8646664701918179, "yes": 0.10837886297449541}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9923961665294451, "res": {"Yes": 0.9923961665294451, "yes": 0.005541533563573821}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.944743408268457, "res": {"Yes": 0.944743408268457, "yes": 0.04524814971266859}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8846225607261698, "res": {"Yes": 0.8846225607261698, "yes": 0.0964287100124312}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.81605681517654, "res": {"Yes": 0.81605681517654, "yes": 0.11866000215387362}, "ground_truth": 1}, {"key": "37139607", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.941424472966709, "res": {"Yes": 0.941424472966709, "yes": 0.046309993446648115}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9424636198816698, "res": {"Yes": 0.9424636198816698, "yes": 0.0460688989417384}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9196670009039069, "res": {"Yes": 0.9196670009039069, "yes": 0.06738781929650209}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9171621736161444, "res": {"Yes": 0.9171621736161444, "yes": 0.06760326488392525}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9164526783879484, "res": {"Yes": 0.9164526783879484, "yes": 0.07238156059089638}, "ground_truth": 1}, {"key": "37092824", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8957771083106635, "res": {"Yes": 0.8957771083106635, "yes": 0.09244691417672626}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9298605186771415, "res": {"Yes": 0.9298605186771415, "yes": 0.056578144948102}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8868619019000044, "res": {"Yes": 0.8868619019000044, "yes": 0.10852312446506972}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8208938429676468, "res": {"Yes": 0.8208938429676468, "yes": 0.17392390552340892}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8131112174166177, "res": {"Yes": 0.8131112174166177, "yes": 0.1775358888145331}, "ground_truth": 1}, {"key": "32191802", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8969937989916709, "res": {"Yes": 0.8969937989916709, "yes": 0.09806729032194038}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9104118494885223, "res": {"Yes": 0.9104118494885223, "yes": 0.0862098274732984}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9679363141860549, "res": {"Yes": 0.9679363141860549, "yes": 0.02482446301749635}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.974397579619232, "res": {"Yes": 0.974397579619232, "yes": 0.02199622788714366}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9765450202905361, "res": {"Yes": 0.9765450202905361, "yes": 0.018112644381386735}, "ground_truth": 1}, {"key": "39396038", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7732417629340359, "res": {"Yes": 0.7732417629340359, "yes": 0.2230477287843874}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8012185601655817, "res": {"Yes": 0.8012185601655817, "yes": 0.19617036266782636}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7721056082979424, "res": {"Yes": 0.7721056082979424, "yes": 0.14629063285075933}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7753071233008991, "res": {"Yes": 0.7753071233008991, "yes": 0.11913913108912788}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9783086046938383, "res": {"Yes": 0.9783086046938383, "yes": 0.01784258255956268}, "ground_truth": 1}, {"key": "39076884", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9778240540170586, "res": {"Yes": 0.9778240540170586, "yes": 0.010099756139806721}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9753848752782772, "res": {"Yes": 0.9753848752782772, "yes": 0.014300554017274463}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.817573313589464, "res": {"Yes": 0.817573313589464, "yes": 0.17396586102905176}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9145154562334014, "res": {"Yes": 0.9145154562334014, "yes": 0.07534707300394236}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8731432912618173, "res": {"Yes": 0.8731432912618173, "yes": 0.12000311238437249}, "ground_truth": 1}, {"key": "27763432", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9729240007668709, "res": {"Yes": 0.9729240007668709, "yes": 0.020007061354837888}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8986460026196662, "res": {"Yes": 0.8986460026196662, "yes": 0.08969245743672143}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8923037566587506, "res": {"Yes": 0.8923037566587506, "yes": 0.09745949726039474}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.916190331877535, "res": {"Yes": 0.916190331877535, "yes": 0.07457618937493668}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9066452445263297, "res": {"Yes": 0.9066452445263297, "yes": 0.08257180504557426}, "ground_truth": 1}, {"key": "37806929", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8611087637004183, "res": {"Yes": 0.8611087637004183, "yes": 0.12572848224400707}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8854070687821164, "res": {"Yes": 0.8854070687821164, "yes": 0.10501517662328777}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8431509534901593, "res": {"Yes": 0.8431509534901593, "yes": 0.15140236760850084}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8155955878458004, "res": {"Yes": 0.8155955878458004, "yes": 0.17925553882137948}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9845654260413448, "res": {"Yes": 0.9845654260413448, "yes": 0.011109182060033595}, "ground_truth": 1}, {"key": "32334186", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9822831981981752, "res": {"Yes": 0.9822831981981752, "yes": 0.013513936401267664}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9855729428945585, "res": {"Yes": 0.9855729428945585, "yes": 0.012066744482661065}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5585646559663349, "res": {"Yes": 0.5585646559663349, "yes": 0.3959260609237727}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6309247996556343, "res": {"Yes": 0.6309247996556343, "yes": 0.35223278533028846}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.766637990598278, "res": {"Yes": 0.766637990598278, "yes": 0.21166511489270404}, "ground_truth": 1}, {"key": "36187324", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9735581334800598, "res": {"Yes": 0.9735581334800598, "yes": 0.022116857022805587}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5426388400237578, "res": {"Yes": 0.5426388400237578, "yes": 0.42919919663517664}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8235461579451677, "res": {"Yes": 0.8235461579451677, "yes": 0.11617335079269508}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7862785107025961, "res": {"Yes": 0.7862785107025961, "yes": 0.1865988973034137}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5949461391326782, "res": {"Yes": 0.5949461391326782, "yes": 0.2698510065201105}, "ground_truth": 1}, {"key": "35306009", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9964040403046844, "res": {"Yes": 0.9964040403046844, "yes": 0.0016899076441033375}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9338324187793506, "res": {"Yes": 0.9338324187793506, "yes": 0.06357773172855921}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8186535504081297, "res": {"Yes": 0.8186535504081297, "yes": 0.1686742059590182}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.915356474462891, "res": {"Yes": 0.915356474462891, "yes": 0.06963655040412099}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.887508091935792, "res": {"Yes": 0.887508091935792, "yes": 0.0917404761146903}, "ground_truth": 1}, {"key": "39490050", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8619180693443836, "res": {"Yes": 0.8619180693443836, "yes": 0.12269915671834664}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9055397900461387, "res": {"Yes": 0.9055397900461387, "yes": 0.07882955413171149}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8876423731924706, "res": {"Yes": 0.8876423731924706, "yes": 0.10711151403257227}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6464586904677881, "res": {"Yes": 0.6464586904677881, "yes": 0.3467032334402229}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9770921539496122, "res": {"Yes": 0.9770921539496122, "yes": 0.016556051408043407}, "ground_truth": 1}, {"key": "38072149", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7972951041734023, "res": {"Yes": 0.7972951041734023, "yes": 0.19810012386753756}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.91051312933574, "res": {"Yes": 0.91051312933574, "yes": 0.08274830899826893}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8684656283170622, "res": {"Yes": 0.8684656283170622, "yes": 0.12762861215733515}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8281863082036667, "res": {"Yes": 0.8281863082036667, "yes": 0.16777684267251405}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.883128648676754, "res": {"Yes": 0.883128648676754, "yes": 0.11391627844458975}, "ground_truth": 1}, {"key": "35899689", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9191198331218561, "res": {"Yes": 0.9191198331218561, "yes": 0.07502971829472906}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8947568478485921, "res": {"Yes": 0.8947568478485921, "yes": 0.09925221919673248}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7652235466498333, "res": {"Yes": 0.7652235466498333, "yes": 0.22733047118075994}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8451848399866081, "res": {"Yes": 0.8451848399866081, "yes": 0.14784738791035215}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5786493488386067, "res": {"Yes": 0.5786493488386067, "yes": 0.4127839485076275}, "ground_truth": 1}, {"key": "27994518", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6655727705116421, "res": {"Yes": 0.6655727705116421, "yes": 0.3277905964288976}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7429413456337193, "res": {"Yes": 0.7429413456337193, "yes": 0.2517561295284061}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8939298718555878, "res": {"Yes": 0.8939298718555878, "yes": 0.09616903063607299}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8900827927684808, "res": {"Yes": 0.8900827927684808, "yes": 0.0967629408946239}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9169624741252875, "res": {"Yes": 0.9169624741252875, "yes": 0.07245337698880952}, "ground_truth": 1}, {"key": "10615479", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9016570697110544, "res": {"Yes": 0.9016570697110544, "yes": 0.08734230850747308}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8789821576211171, "res": {"Yes": 0.8789821576211171, "yes": 0.10573977035145621}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.37929929081862973, "res": {"yes": 0.5871498562486978, "Yes": 0.37929929081862973}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6091526718484547, "res": {"Yes": 0.6091526718484547, "yes": 0.31729777268651227}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3782276938608904, "res": {"yes": 0.5570509338821906, "Yes": 0.3782276938608904}, "ground_truth": 1}, {"key": "40186667", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.48021352031496406, "res": {"Yes": 0.48021352031496406, "yes": 0.47323221980748537}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7587384948638742, "res": {"Yes": 0.7587384948638742, "yes": 0.23873376546156838}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8222146801160476, "res": {"Yes": 0.8222146801160476, "yes": 0.1703733966025317}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9591123017335713, "res": {"Yes": 0.9591123017335713, "yes": 0.03429798407023523}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7958948100885896, "res": {"Yes": 0.7958948100885896, "yes": 0.1966077602046509}, "ground_truth": 1}, {"key": "38622886", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9591936791506905, "res": {"Yes": 0.9591936791506905, "yes": 0.03552533411415967}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.977831804281225, "res": {"Yes": 0.977831804281225, "yes": 0.017818304631313977}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.44293127784004394, "res": {"yes": 0.5154661974665045, "Yes": 0.44293127784004394}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.503411274414068, "res": {"Yes": 0.503411274414068, "yes": 0.48992652896866395}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4891890641389685, "res": {"Yes": 0.4891890641389685, "yes": 0.4416417128648443}, "ground_truth": 1}, {"key": "40686943", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5230140923815612, "res": {"Yes": 0.5230140923815612, "yes": 0.4374758389353802}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5463595191200622, "res": {"Yes": 0.5463595191200622, "yes": 0.30519071582653906}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8725057802492434, "res": {"Yes": 0.8725057802492434, "yes": 0.1199008653152678}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8095182267974653, "res": {"Yes": 0.8095182267974653, "yes": 0.18253694282622332}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9317892690797497, "res": {"Yes": 0.9317892690797497, "yes": 0.061748499707748636}, "ground_truth": 1}, {"key": "30604567", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9167809793675706, "res": {"Yes": 0.9167809793675706, "yes": 0.07518503278125763}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9396357432492934, "res": {"Yes": 0.9396357432492934, "yes": 0.05486067991012442}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8343419405426757, "res": {"Yes": 0.8343419405426757, "yes": 0.16167238600306183}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7813054769161317, "res": {"Yes": 0.7813054769161317, "yes": 0.21398073315486932}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8368766005556815, "res": {"Yes": 0.8368766005556815, "yes": 0.15844947598218959}, "ground_truth": 1}, {"key": "35440903", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6312447911942988, "res": {"Yes": 0.6312447911942988, "yes": 0.3645234845924522}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7704937005887766, "res": {"Yes": 0.7704937005887766, "yes": 0.22665870388869047}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7439047623029871, "res": {"Yes": 0.7439047623029871, "yes": 0.2301073591622615}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8378786735067797, "res": {"Yes": 0.8378786735067797, "yes": 0.15321912387422115}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9730914962663024, "res": {"Yes": 0.9730914962663024, "yes": 0.020040356770595822}, "ground_truth": 1}, {"key": "37219533", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.619001056199079, "res": {"Yes": 0.619001056199079, "yes": 0.34448979899907606}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7167240069725546, "res": {"Yes": 0.7167240069725546, "yes": 0.2675738435732365}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9452935980254483, "res": {"Yes": 0.9452935980254483, "yes": 0.045090901202506854}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9767827920473103, "res": {"Yes": 0.9767827920473103, "yes": 0.020057205765381777}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5984570974572639, "res": {"Yes": 0.5984570974572639, "yes": 0.36618972704091596}, "ground_truth": 1}, {"key": "40178965", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7482131812064526, "res": {"Yes": 0.7482131812064526, "yes": 0.1993136429033783}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7054396313177594, "res": {"Yes": 0.7054396313177594, "yes": 0.2131363877742429}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9335731164506118, "res": {"Yes": 0.9335731164506118, "yes": 0.06068265932904103}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9390260949602527, "res": {"Yes": 0.9390260949602527, "yes": 0.05554360450208708}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9143343635248408, "res": {"Yes": 0.9143343635248408, "yes": 0.08110018797872628}, "ground_truth": 1}, {"key": "13750468", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9654872063539136, "res": {"Yes": 0.9654872063539136, "yes": 0.030741492182611293}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.941675456289994, "res": {"Yes": 0.941675456289994, "yes": 0.052013805773198876}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8814956957731315, "res": {"Yes": 0.8814956957731315, "yes": 0.1099509413938814}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7301826869884391, "res": {"Yes": 0.7301826869884391, "yes": 0.26242616833451854}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9824042131191222, "res": {"Yes": 0.9824042131191222, "yes": 0.013860149483960572}, "ground_truth": 1}, {"key": "17754949", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.87201253515389, "res": {"Yes": 0.87201253515389, "yes": 0.12117432108875516}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.758270876739888, "res": {"Yes": 0.758270876739888, "yes": 0.23496096147985007}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8114969098361927, "res": {"Yes": 0.8114969098361927, "yes": 0.1367221793580685}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5951998008837627, "res": {"Yes": 0.5951998008837627, "yes": 0.3617823829473026}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9468051282067854, "res": {"Yes": 0.9468051282067854, "yes": 0.050462979032848986}, "ground_truth": 1}, {"key": "36675623", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.844404005005429, "res": {"Yes": 0.844404005005429, "yes": 0.1495771448769096}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8907588456334034, "res": {"Yes": 0.8907588456334034, "yes": 0.10835757945591715}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8377872240405119, "res": {"Yes": 0.8377872240405119, "yes": 0.13410628792134807}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7174984439941361, "res": {"Yes": 0.7174984439941361, "yes": 0.2403639610492023}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9208338322632881, "res": {"Yes": 0.9208338322632881, "yes": 0.06424003914424677}, "ground_truth": 1}, {"key": "40035440", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9894430875159153, "res": {"Yes": 0.9894430875159153, "yes": 0.00462005162365324}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.993112150725154, "res": {"Yes": 0.993112150725154, "yes": 0.0048099225349218806}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8799742881043319, "res": {"Yes": 0.8799742881043319, "yes": 0.11131030916139448}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7967593265588645, "res": {"Yes": 0.7967593265588645, "yes": 0.1971957675050639}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8884066425577536, "res": {"Yes": 0.8884066425577536, "yes": 0.10278405962479935}, "ground_truth": 1}, {"key": "37685909", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8754635688008361, "res": {"Yes": 0.8754635688008361, "yes": 0.11748800919612477}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8254387135410389, "res": {"Yes": 0.8254387135410389, "yes": 0.16387018819234966}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9323559841324295, "res": {"Yes": 0.9323559841324295, "yes": 0.06370905982914893}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9719907014275678, "res": {"Yes": 0.9719907014275678, "yes": 0.024745139329051983}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9733221657483079, "res": {"Yes": 0.9733221657483079, "yes": 0.022505928850866858}, "ground_truth": 1}, {"key": "36938787", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9752959749492704, "res": {"Yes": 0.9752959749492704, "yes": 0.021396280082172977}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9694352165868557, "res": {"Yes": 0.9694352165868557, "yes": 0.02605033632806205}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5856311643150146, "res": {"Yes": 0.5856311643150146, "yes": 0.40875924470955527}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7957852706656051, "res": {"Yes": 0.7957852706656051, "yes": 0.19792440779437026}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5920245176537785, "res": {"Yes": 0.5920245176537785, "yes": 0.40267934649958015}, "ground_truth": 1}, {"key": "39398068", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7596599804174242, "res": {"Yes": 0.7596599804174242, "yes": 0.23386847366602734}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8070640547067361, "res": {"Yes": 0.8070640547067361, "yes": 0.18844667563356388}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6725306717473067, "res": {"Yes": 0.6725306717473067, "yes": 0.31343900476247455}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7603551516895493, "res": {"Yes": 0.7603551516895493, "yes": 0.23417359024426826}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7021716985733536, "res": {"Yes": 0.7021716985733536, "yes": 0.29153083451605233}, "ground_truth": 1}, {"key": "39926408", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5444766069361932, "res": {"Yes": 0.5444766069361932, "yes": 0.4420669932282769}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.967312955903348, "res": {"Yes": 0.967312955903348, "yes": 0.023940037797754134}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8870865990856801, "res": {"Yes": 0.8870865990856801, "yes": 0.10399789712469978}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7901545893048959, "res": {"Yes": 0.7901545893048959, "yes": 0.19128362590592382}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8104563863905748, "res": {"Yes": 0.8104563863905748, "yes": 0.1801506200391815}, "ground_truth": 1}, {"key": "40465336", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8904569331242268, "res": {"Yes": 0.8904569331242268, "yes": 0.09724703357827168}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9184281066080983, "res": {"Yes": 0.9184281066080983, "yes": 0.07342307642951756}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8289566807808898, "res": {"Yes": 0.8289566807808898, "yes": 0.16289333156013966}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8590149097905746, "res": {"Yes": 0.8590149097905746, "yes": 0.1310076841326663}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.866075354301719, "res": {"Yes": 0.866075354301719, "yes": 0.11983642393346979}, "ground_truth": 1}, {"key": "34173549", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8770757906416676, "res": {"Yes": 0.8770757906416676, "yes": 0.11091320727137885}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8785389371732223, "res": {"Yes": 0.8785389371732223, "yes": 0.11163745132294958}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9403193005129468, "res": {"Yes": 0.9403193005129468, "yes": 0.0471929340465855}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9231791444351409, "res": {"Yes": 0.9231791444351409, "yes": 0.05871752762741787}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.931438633814527, "res": {"Yes": 0.931438633814527, "yes": 0.06158797121466638}, "ground_truth": 1}, {"key": "33541535", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5435424883714323, "res": {"Yes": 0.5435424883714323, "yes": 0.44488137739635863}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5059204173644564, "res": {"Yes": 0.5059204173644564, "yes": 0.49003324168746776}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9771643090067047, "res": {"Yes": 0.9771643090067047, "yes": 0.016648702392118915}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.964835801506731, "res": {"Yes": 0.964835801506731, "yes": 0.03283253613234895}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9743312634023652, "res": {"Yes": 0.9743312634023652, "yes": 0.01768520260626275}, "ground_truth": 1}, {"key": "35685195", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9775242570833274, "res": {"Yes": 0.9775242570833274, "yes": 0.018885508444256932}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.973777617691645, "res": {"Yes": 0.973777617691645, "yes": 0.019768500340259787}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7965766266925857, "res": {"Yes": 0.7965766266925857, "yes": 0.19365617230895396}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7308591420017165, "res": {"Yes": 0.7308591420017165, "yes": 0.25798413195560854}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5066910347830016, "res": {"Yes": 0.5066910347830016, "yes": 0.4826633192423541}, "ground_truth": 1}, {"key": "28440730", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8160246550111742, "res": {"Yes": 0.8160246550111742, "yes": 0.17494192802268982}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8077240750962159, "res": {"Yes": 0.8077240750962159, "yes": 0.18384992117154517}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9564677493741146, "res": {"Yes": 0.9564677493741146, "yes": 0.03878529496257094}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8899169902391726, "res": {"Yes": 0.8899169902391726, "yes": 0.10074184745401496}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9492730716784373, "res": {"Yes": 0.9492730716784373, "yes": 0.04541851216984966}, "ground_truth": 1}, {"key": "38338714", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9209191514701985, "res": {"Yes": 0.9209191514701985, "yes": 0.07258915125984416}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9238925056357898, "res": {"Yes": 0.9238925056357898, "yes": 0.06782966790677097}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9158197473877255, "res": {"Yes": 0.9158197473877255, "yes": 0.07641899408736218}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8571751547461303, "res": {"Yes": 0.8571751547461303, "yes": 0.13618665104753563}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8462340015881982, "res": {"Yes": 0.8462340015881982, "yes": 0.14591195312146446}, "ground_truth": 1}, {"key": "32191881", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8339786220379034, "res": {"Yes": 0.8339786220379034, "yes": 0.1582143214894129}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8703657703482326, "res": {"Yes": 0.8703657703482326, "yes": 0.1232227728996194}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9531734287360518, "res": {"Yes": 0.9531734287360518, "yes": 0.04325566809376504}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9413147177443413, "res": {"Yes": 0.9413147177443413, "yes": 0.05532334542245774}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9182825381062563, "res": {"Yes": 0.9182825381062563, "yes": 0.07772294210531919}, "ground_truth": 1}, {"key": "37707251", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8999148907165141, "res": {"Yes": 0.8999148907165141, "yes": 0.09017683527763769}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9441526314956357, "res": {"Yes": 0.9441526314956357, "yes": 0.05169260729768303}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7201084451859615, "res": {"Yes": 0.7201084451859615, "yes": 0.129284652122286}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5280544159895415, "res": {"Yes": 0.5280544159895415, "yes": 0.2376915628969605}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7125069019064525, "res": {"Yes": 0.7125069019064525, "yes": 0.20361459748392663}, "ground_truth": 1}, {"key": "40172567", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6436887031149309, "res": {"Yes": 0.6436887031149309, "yes": 0.1756387061155791}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5490036988849215, "res": {"Yes": 0.5490036988849215, "yes": 0.33697730335256804}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8229602404355041, "res": {"Yes": 0.8229602404355041, "yes": 0.17272505952230244}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9584316915734843, "res": {"Yes": 0.9584316915734843, "yes": 0.03699945855846442}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9765097783932252, "res": {"Yes": 0.9765097783932252, "yes": 0.018313312950295528}, "ground_truth": 1}, {"key": "33113255", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9718723405790178, "res": {"Yes": 0.9718723405790178, "yes": 0.023967421698619915}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6152557155212232, "res": {"Yes": 0.6152557155212232, "yes": 0.37408613522358086}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7410540458322201, "res": {"Yes": 0.7410540458322201, "yes": 0.22745117022555525}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.871285414337812, "res": {"Yes": 0.871285414337812, "yes": 0.12459911193271131}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9034873772443698, "res": {"Yes": 0.9034873772443698, "yes": 0.093146821956417}, "ground_truth": 1}, {"key": "33022143", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7768240516578429, "res": {"Yes": 0.7768240516578429, "yes": 0.200505540339766}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7216770278635676, "res": {"Yes": 0.7216770278635676, "yes": 0.25900342329881654}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8610881490033743, "res": {"Yes": 0.8610881490033743, "yes": 0.06502100548397112}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7475076273175919, "res": {"Yes": 0.7475076273175919, "yes": 0.19539516183347128}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.592814917999859, "res": {"Yes": 0.592814917999859, "yes": 0.21143283414202052}, "ground_truth": 1}, {"key": "32084473", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7314768180511494, "res": {"Yes": 0.7314768180511494, "yes": 0.2100566337708362}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8542594548799922, "res": {"Yes": 0.8542594548799922, "yes": 0.10806860738297237}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7028957930157048, "res": {"Yes": 0.7028957930157048, "yes": 0.2840061176590339}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8176152234716596, "res": {"Yes": 0.8176152234716596, "yes": 0.17060345364848722}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5859571294596392, "res": {"Yes": 0.5859571294596392, "yes": 0.3971009096955642}, "ground_truth": 1}, {"key": "40564245", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7166363856518626, "res": {"Yes": 0.7166363856518626, "yes": 0.2731404628994521}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7260126064876797, "res": {"Yes": 0.7260126064876797, "yes": 0.2569894616964056}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9807775691645263, "res": {"Yes": 0.9807775691645263, "yes": 0.015320783096753379}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6185618327725957, "res": {"Yes": 0.6185618327725957, "yes": 0.36508028159852834}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5804409532917829, "res": {"Yes": 0.5804409532917829, "yes": 0.403548257963864}, "ground_truth": 1}, {"key": "31717213", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.667215230973812, "res": {"Yes": 0.667215230973812, "yes": 0.32391378314674574}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.46706382519440104, "res": {"yes": 0.5183279287189764, "Yes": 0.46706382519440104}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8001369647746231, "res": {"Yes": 0.8001369647746231, "yes": 0.1842944000017424}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9496903783846884, "res": {"Yes": 0.9496903783846884, "yes": 0.04341525074273367}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9414134809587341, "res": {"Yes": 0.9414134809587341, "yes": 0.05218547914974241}, "ground_truth": 1}, {"key": "34861894", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9640989446116863, "res": {"Yes": 0.9640989446116863, "yes": 0.030911468965830486}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8081546178843786, "res": {"Yes": 0.8081546178843786, "yes": 0.17686752021781627}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9844598213103548, "res": {"Yes": 0.9844598213103548, "yes": 0.014458193513008508}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9046310300656641, "res": {"Yes": 0.9046310300656641, "yes": 0.09196260058162577}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9770292156085633, "res": {"Yes": 0.9770292156085633, "yes": 0.02115854364947096}, "ground_truth": 1}, {"key": "40838760", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9785151591822908, "res": {"Yes": 0.9785151591822908, "yes": 0.019929943095096898}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.988219774643606, "res": {"Yes": 0.988219774643606, "yes": 0.010600420593703704}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5759074547682399, "res": {"Yes": 0.5759074547682399, "yes": 0.41852512709781203}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7766131337252936, "res": {"Yes": 0.7766131337252936, "yes": 0.2141294586295122}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7923235483069064, "res": {"Yes": 0.7923235483069064, "yes": 0.20326014863850608}, "ground_truth": 1}, {"key": "40044849", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8082375468843193, "res": {"Yes": 0.8082375468843193, "yes": 0.18804016412631164}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7150958566709638, "res": {"Yes": 0.7150958566709638, "yes": 0.2793942813438261}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7797796493833045, "res": {"Yes": 0.7797796493833045, "yes": 0.21430509536508316}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8374478884207727, "res": {"Yes": 0.8374478884207727, "yes": 0.15827617955705472}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5521797785224777, "res": {"Yes": 0.5521797785224777, "yes": 0.44119816531896544}, "ground_truth": 1}, {"key": "30296116", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6927560929286457, "res": {"Yes": 0.6927560929286457, "yes": 0.3013421044480828}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.797672521355587, "res": {"Yes": 0.797672521355587, "yes": 0.19533791943747464}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9881921829444739, "res": {"Yes": 0.9881921829444739, "yes": 0.010340083856725205}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.4202299363543626, "res": {"yes": 0.4662375550323333, "Yes": 0.4202299363543626}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5891745614819112, "res": {"Yes": 0.5891745614819112, "yes": 0.2717845016625354}, "ground_truth": 1}, {"key": "34931360", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6859806696541425, "res": {"Yes": 0.6859806696541425, "yes": 0.2895987021519869}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9618903616405784, "res": {"Yes": 0.9618903616405784, "yes": 0.03292354248705199}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.3481339707052218, "res": {"yes": 0.5809494099603296, "Yes": 0.3481339707052218}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6738786020132206, "res": {"Yes": 0.6738786020132206, "yes": 0.28138790971227584}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7282278783895876, "res": {"Yes": 0.7282278783895876, "yes": 0.22425788881495043}, "ground_truth": 1}, {"key": "18862422", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8958020784455772, "res": {"Yes": 0.8958020784455772, "yes": 0.10098851312948705}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7276194725378166, "res": {"Yes": 0.7276194725378166, "yes": 0.20163472713610403}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9032353303946756, "res": {"Yes": 0.9032353303946756, "yes": 0.09013263241251894}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7848918039987618, "res": {"Yes": 0.7848918039987618, "yes": 0.20611069503025362}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6975778071768461, "res": {"Yes": 0.6975778071768461, "yes": 0.29751027999155105}, "ground_truth": 1}, {"key": "36361140", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8820365270005956, "res": {"Yes": 0.8820365270005956, "yes": 0.11097204014335212}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8132340390101496, "res": {"Yes": 0.8132340390101496, "yes": 0.17930059139890456}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.928810591722637, "res": {"Yes": 0.928810591722637, "yes": 0.06091750543526469}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9325647433577928, "res": {"Yes": 0.9325647433577928, "yes": 0.052965569955918855}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.848477425370524, "res": {"Yes": 0.848477425370524, "yes": 0.13468115619091278}, "ground_truth": 1}, {"key": "39703329", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8744517766104466, "res": {"Yes": 0.8744517766104466, "yes": 0.10495317297145475}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9119280906390218, "res": {"Yes": 0.9119280906390218, "yes": 0.07667895415363614}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9657515330672889, "res": {"Yes": 0.9657515330672889, "yes": 0.02439916667329525}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.96769134598976, "res": {"Yes": 0.96769134598976, "yes": 0.022682481330506706}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9575705616627694, "res": {"Yes": 0.9575705616627694, "yes": 0.032838554887824105}, "ground_truth": 1}, {"key": "34033324", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9677711644636459, "res": {"Yes": 0.9677711644636459, "yes": 0.02409544886728165}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.781166284119668, "res": {"Yes": 0.781166284119668, "yes": 0.209418591471538}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5348931348405502, "res": {"Yes": 0.5348931348405502, "yes": 0.400637456148586}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6319183223342322, "res": {"Yes": 0.6319183223342322, "yes": 0.320472823592423}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9763114518225875, "res": {"Yes": 0.9763114518225875, "yes": 0.01965625442425722}, "ground_truth": 1}, {"key": "35658862", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7922481226954237, "res": {"Yes": 0.7922481226954237, "yes": 0.20254655856972395}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9451609911432812, "res": {"Yes": 0.9451609911432812, "yes": 0.05338343965142321}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9569358262381812, "res": {"Yes": 0.9569358262381812, "yes": 0.039455556201415956}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9962039353394238, "res": {"Yes": 0.9962039353394238, "yes": 0.0028630702957187903}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9685332008798934, "res": {"Yes": 0.9685332008798934, "yes": 0.03045979078666909}, "ground_truth": 1}, {"key": "36092657", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.732922142746176, "res": {"Yes": 0.732922142746176, "\u064a": 0.1499359764001758}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8794911477051292, "res": {"Yes": 0.8794911477051292, "yes": 0.07131467872713014}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7794398330946735, "res": {"Yes": 0.7794398330946735, "yes": 0.21709815511329095}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.721957692104114, "res": {"Yes": 0.721957692104114, "yes": 0.272113559949663}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6227684242315525, "res": {"Yes": 0.6227684242315525, "yes": 0.374216489539496}, "ground_truth": 1}, {"key": "26333438", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5837087711929462, "res": {"Yes": 0.5837087711929462, "yes": 0.41329380776444513}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7106942996042597, "res": {"Yes": 0.7106942996042597, "yes": 0.2853316759819067}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5484968183070964, "res": {"Yes": 0.5484968183070964, "yes": 0.4313671221135923}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5592690028615476, "res": {"Yes": 0.5592690028615476, "yes": 0.43334786073044307}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6314492024096677, "res": {"Yes": 0.6314492024096677, "yes": 0.2754133827513095}, "ground_truth": 1}, {"key": "34184963", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.37227051430159736, "res": {"yes": 0.5647058233877257, "Yes": 0.37227051430159736}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.4115506829939186, "res": {"yes": 0.5307618243918618, "Yes": 0.4115506829939186}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9273949521985153, "res": {"Yes": 0.9273949521985153, "yes": 0.06779051436123798}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9751144658533525, "res": {"Yes": 0.9751144658533525, "yes": 0.018823994871535087}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9774264147347106, "res": {"Yes": 0.9774264147347106, "yes": 0.015287170985489278}, "ground_truth": 1}, {"key": "35069975", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9756421728218349, "res": {"Yes": 0.9756421728218349, "yes": 0.020448269931474486}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9735030004970048, "res": {"Yes": 0.9735030004970048, "yes": 0.018673101744696198}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5083497622249181, "res": {"Yes": 0.5083497622249181, "yes": 0.46897118577649155}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7935728379267464, "res": {"Yes": 0.7935728379267464, "yes": 0.1657324231306351}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5963343972773467, "res": {"Yes": 0.5963343972773467, "yes": 0.3558811329833539}, "ground_truth": 1}, {"key": "36443950", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7565734413723246, "res": {"Yes": 0.7565734413723246, "yes": 0.20304702023262236}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7343441434222105, "res": {"Yes": 0.7343441434222105, "yes": 0.2356807839361029}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8735868559058546, "res": {"Yes": 0.8735868559058546, "yes": 0.11545127494235917}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9099107692939062, "res": {"Yes": 0.9099107692939062, "yes": 0.07927611449999786}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9221082218832508, "res": {"Yes": 0.9221082218832508, "yes": 0.07294042258652507}, "ground_truth": 1}, {"key": "29460858", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.935051033406057, "res": {"Yes": 0.935051033406057, "yes": 0.0563163543628915}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.940462042412694, "res": {"Yes": 0.940462042412694, "yes": 0.05529310841555424}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8652460773691896, "res": {"Yes": 0.8652460773691896, "yes": 0.12860354518803546}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6604784383424914, "res": {"Yes": 0.6604784383424914, "yes": 0.3290987240925819}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9705576555976045, "res": {"Yes": 0.9705576555976045, "yes": 0.02250711044314805}, "ground_truth": 1}, {"key": "36155704", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.950512854699921, "res": {"Yes": 0.950512854699921, "yes": 0.040603297356293935}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7901122381539301, "res": {"Yes": 0.7901122381539301, "yes": 0.20262850522525913}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8945922509935011, "res": {"Yes": 0.8945922509935011, "yes": 0.08222414378788735}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8480921853156466, "res": {"Yes": 0.8480921853156466, "yes": 0.14095105169135663}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8952835135085135, "res": {"Yes": 0.8952835135085135, "yes": 0.10362898606686462}, "ground_truth": 1}, {"key": "37185211", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8762836842523796, "res": {"Yes": 0.8762836842523796, "yes": 0.1186539981096682}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9936934152139343, "res": {"Yes": 0.9936934152139343, "yes": 0.005662617714253054}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.43298954360044367, "res": {"yes": 0.4856255137236436, "Yes": 0.43298954360044367}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7840637563995322, "res": {"Yes": 0.7840637563995322, "yes": 0.14973940436235766}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6303253023023196, "res": {"Yes": 0.6303253023023196, "yes": 0.2938018554240641}, "ground_truth": 1}, {"key": "36454885", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9358603241269521, "res": {"Yes": 0.9358603241269521, "yes": 0.056014622850675475}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6662892256254263, "res": {"Yes": 0.6662892256254263, "yes": 0.28207970673076577}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9383755659237835, "res": {"Yes": 0.9383755659237835, "yes": 0.05600792950318031}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.939301373240384, "res": {"Yes": 0.939301373240384, "yes": 0.056428473419920806}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9090872816475324, "res": {"Yes": 0.9090872816475324, "yes": 0.0864809258296702}, "ground_truth": 1}, {"key": "33148906", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9200110482902659, "res": {"Yes": 0.9200110482902659, "yes": 0.07497066314760638}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9081101201828294, "res": {"Yes": 0.9081101201828294, "yes": 0.08510910988114666}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8044891252847324, "res": {"Yes": 0.8044891252847324, "yes": 0.15267735119370487}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8276315761168156, "res": {"Yes": 0.8276315761168156, "yes": 0.1428343373998353}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7837050273915296, "res": {"Yes": 0.7837050273915296, "yes": 0.14340875495434874}, "ground_truth": 1}, {"key": "18086604", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8946423406158802, "res": {"Yes": 0.8946423406158802, "yes": 0.06903920826810855}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8346348125803076, "res": {"Yes": 0.8346348125803076, "yes": 0.13007041777334777}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7157699804429262, "res": {"Yes": 0.7157699804429262, "yes": 0.20756861900055662}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7384925389800933, "res": {"Yes": 0.7384925389800933, "yes": 0.2577074892119705}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.42604074148371684, "res": {"yes": 0.5493731379200492, "Yes": 0.42604074148371684}, "ground_truth": 1}, {"key": "33693397", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6772440185658816, "res": {"Yes": 0.6772440185658816, "yes": 0.2697988763415882}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.492930987265981, "res": {"Yes": 0.492930987265981, "yes": 0.22088654065408775}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.406142701340984, "res": {"yes": 0.4394185338573643, "Yes": 0.406142701340984}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7639960958291065, "res": {"Yes": 0.7639960958291065, "yes": 0.22033325132496476}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4160451046329326, "res": {"yes": 0.437618698117285, "Yes": 0.4160451046329326}, "ground_truth": 1}, {"key": "39501530", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.367542875171865, "res": {"yes": 0.4916410509481702, "Yes": 0.367542875171865}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7402475808672759, "res": {"Yes": 0.7402475808672759, "yes": 0.15138186911570595}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8737629975072313, "res": {"Yes": 0.8737629975072313, "yes": 0.12320101367932101}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8801473256596236, "res": {"Yes": 0.8801473256596236, "yes": 0.11427625186255061}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9131970908475905, "res": {"Yes": 0.9131970908475905, "yes": 0.08259813313349702}, "ground_truth": 1}, {"key": "30948874", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8675411353625274, "res": {"Yes": 0.8675411353625274, "yes": 0.12907718760957296}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9175767970953825, "res": {"Yes": 0.9175767970953825, "yes": 0.0798368181982003}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.951828750785927, "res": {"Yes": 0.951828750785927, "yes": 0.0312305061701716}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6289673383132179, "res": {"Yes": 0.6289673383132179, "yes": 0.31443132159355375}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.40263048835195964, "res": {"yes": 0.5568299677612257, "Yes": 0.40263048835195964}, "ground_truth": 1}, {"key": "39410675", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3377041373874954, "res": {"yes": 0.5326656766128517, "Yes": 0.3377041373874954}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5772434544536239, "res": {"Yes": 0.5772434544536239, "yes": 0.3753705903751462}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7905352510248853, "res": {"Yes": 0.7905352510248853, "yes": 0.20326588065551876}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8936353258028094, "res": {"Yes": 0.8936353258028094, "yes": 0.101367799239408}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6525870883211659, "res": {"Yes": 0.6525870883211659, "yes": 0.3431154498381379}, "ground_truth": 1}, {"key": "32903337", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8159243309473887, "res": {"Yes": 0.8159243309473887, "yes": 0.1777390883155453}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6582145545905795, "res": {"Yes": 0.6582145545905795, "yes": 0.33368524852517867}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8791290917684427, "res": {"Yes": 0.8791290917684427, "yes": 0.11855799841991368}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6884593940275102, "res": {"Yes": 0.6884593940275102, "yes": 0.3028128260980641}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9016753915688605, "res": {"Yes": 0.9016753915688605, "yes": 0.09217864419793424}, "ground_truth": 1}, {"key": "27685132", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8057287996158302, "res": {"Yes": 0.8057287996158302, "yes": 0.18591592938539495}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9098848099099707, "res": {"Yes": 0.9098848099099707, "yes": 0.08720862869834957}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8330441058647624, "res": {"Yes": 0.8330441058647624, "yes": 0.16369855856319257}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9813721397511238, "res": {"Yes": 0.9813721397511238, "yes": 0.013846233954048924}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9627036026209635, "res": {"Yes": 0.9627036026209635, "yes": 0.03469590536502206}, "ground_truth": 1}, {"key": "22791471", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.867729325443264, "res": {"Yes": 0.867729325443264, "yes": 0.13074866322368506}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9760737889445954, "res": {"Yes": 0.9760737889445954, "yes": 0.021849621068361583}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.34478653244143, "res": {"yes": 0.5911839251051677, "Yes": 0.34478653244143}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6961205010089963, "res": {"Yes": 0.6961205010089963, "yes": 0.28795104382912373}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.37960740692098244, "res": {"yes": 0.5053915012792304, "Yes": 0.37960740692098244}, "ground_truth": 1}, {"key": "32292348", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.34110422610425556, "res": {"yes": 0.5377910914960413, "Yes": 0.34110422610425556}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.4396917173591365, "res": {"Yes": 0.4396917173591365, "yes": 0.41883836383107065}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6098681089674686, "res": {"Yes": 0.6098681089674686, "yes": 0.3713089736122426}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6903838442856282, "res": {"Yes": 0.6903838442856282, "yes": 0.2952559121170441}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9705527125600519, "res": {"Yes": 0.9705527125600519, "yes": 0.02277449521676159}, "ground_truth": 1}, {"key": "20482930", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.602186819753397, "res": {"Yes": 0.602186819753397, "yes": 0.3794034986461074}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5201441990413886, "res": {"Yes": 0.5201441990413886, "yes": 0.470481942536154}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.987513047140465, "res": {"Yes": 0.987513047140465, "yes": 0.007769301621849525}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9843017996435691, "res": {"Yes": 0.9843017996435691, "yes": 0.012382550687890797}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9773278003167762, "res": {"Yes": 0.9773278003167762, "yes": 0.017803468163802717}, "ground_truth": 1}, {"key": "11635754", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9723932705681455, "res": {"Yes": 0.9723932705681455, "yes": 0.020454110791402014}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6695396448469346, "res": {"Yes": 0.6695396448469346, "yes": 0.32746693199905386}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9872640983420213, "res": {"Yes": 0.9872640983420213, "yes": 0.009083424589872428}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9812472474852759, "res": {"Yes": 0.9812472474852759, "yes": 0.014936356567541307}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6663467755110996, "res": {"Yes": 0.6663467755110996, "yes": 0.330082716457085}, "ground_truth": 1}, {"key": "40029096", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.722762755266479, "res": {"Yes": 0.722762755266479, "yes": 0.27126179261341055}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9725003583561395, "res": {"Yes": 0.9725003583561395, "yes": 0.024983129547653776}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5688631974710487, "res": {"Yes": 0.5688631974710487, "yes": 0.3628315507287087}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7348936162801555, "res": {"Yes": 0.7348936162801555, "yes": 0.25808708816265624}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5657323796899467, "res": {"Yes": 0.5657323796899467, "yes": 0.42998076665680157}, "ground_truth": 1}, {"key": "40414719", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9741128905505032, "res": {"Yes": 0.9741128905505032, "yes": 0.021618211381580744}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.2901306326538609, "res": {"yes": 0.6884575214205052, "Yes": 0.2901306326538609}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8543635954547811, "res": {"Yes": 0.8543635954547811, "yes": 0.13678775336318205}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7117551622161953, "res": {"Yes": 0.7117551622161953, "yes": 0.2755967038003414}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8033714212058768, "res": {"Yes": 0.8033714212058768, "yes": 0.17367698727074402}, "ground_truth": 1}, {"key": "39537616", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7653890824050201, "res": {"Yes": 0.7653890824050201, "yes": 0.21257068726056214}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8497986414270376, "res": {"Yes": 0.8497986414270376, "yes": 0.12373241359449175}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9542569482600208, "res": {"Yes": 0.9542569482600208, "yes": 0.04135492332121263}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8127470901961328, "res": {"Yes": 0.8127470901961328, "yes": 0.1509398750406618}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9210403938521954, "res": {"Yes": 0.9210403938521954, "yes": 0.07154121942051411}, "ground_truth": 1}, {"key": "33245830", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8738291788229173, "res": {"Yes": 0.8738291788229173, "yes": 0.11378218683388841}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8944761583445822, "res": {"Yes": 0.8944761583445822, "yes": 0.07789088047745277}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9093905310047593, "res": {"Yes": 0.9093905310047593, "yes": 0.08340353370095441}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9048088980100211, "res": {"Yes": 0.9048088980100211, "yes": 0.08475676609322792}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9160276584036953, "res": {"Yes": 0.9160276584036953, "yes": 0.07252915210073851}, "ground_truth": 1}, {"key": "39243601", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9057635946912179, "res": {"Yes": 0.9057635946912179, "yes": 0.09189053934712083}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9202890164114249, "res": {"Yes": 0.9202890164114249, "yes": 0.07066805125069257}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7066994426155614, "res": {"Yes": 0.7066994426155614, "yes": 0.2876113469455314}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7032014012055706, "res": {"Yes": 0.7032014012055706, "yes": 0.29111010956345845}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6912439350997471, "res": {"Yes": 0.6912439350997471, "yes": 0.29955923540432255}, "ground_truth": 1}, {"key": "35815905", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6798182336498664, "res": {"Yes": 0.6798182336498664, "yes": 0.3075716225485122}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8010351822255657, "res": {"Yes": 0.8010351822255657, "yes": 0.19407102086098094}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5484759374306934, "res": {"Yes": 0.5484759374306934, "yes": 0.2758757212499875}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9825075380034598, "res": {"Yes": 0.9825075380034598, "yes": 0.014632897962571504}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7559705338130271, "res": {"Yes": 0.7559705338130271, "yes": 0.17831812584962342}, "ground_truth": 1}, {"key": "35260212", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9902512462107141, "res": {"Yes": 0.9902512462107141, "yes": 0.007852128794019603}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8237545579576248, "res": {"Yes": 0.8237545579576248, "yes": 0.15132962111301132}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8671500405700839, "res": {"Yes": 0.8671500405700839, "yes": 0.12898798738538828}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8931155510536395, "res": {"Yes": 0.8931155510536395, "yes": 0.10181355810410905}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8286759689824991, "res": {"Yes": 0.8286759689824991, "yes": 0.1675102894586259}, "ground_truth": 1}, {"key": "39193924", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9730428399887047, "res": {"Yes": 0.9730428399887047, "yes": 0.02243631786060316}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9788280179770489, "res": {"Yes": 0.9788280179770489, "yes": 0.014513638288430837}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8264530740846535, "res": {"Yes": 0.8264530740846535, "yes": 0.16687862954195543}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8645408173444902, "res": {"Yes": 0.8645408173444902, "yes": 0.1299955837834652}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.87281796630446, "res": {"Yes": 0.87281796630446, "yes": 0.12088519323936651}, "ground_truth": 1}, {"key": "40658569", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.921497909922657, "res": {"Yes": 0.921497909922657, "yes": 0.07264855347776958}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6398508478648576, "res": {"Yes": 0.6398508478648576, "yes": 0.35341378961774506}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9132524779307467, "res": {"Yes": 0.9132524779307467, "yes": 0.08359814066400476}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9595750737700701, "res": {"Yes": 0.9595750737700701, "yes": 0.037969009175846}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8440569418626882, "res": {"Yes": 0.8440569418626882, "yes": 0.1488604930326349}, "ground_truth": 1}, {"key": "33497596", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9209934634346065, "res": {"Yes": 0.9209934634346065, "yes": 0.07408132520261632}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9123160708814442, "res": {"Yes": 0.9123160708814442, "yes": 0.08239271138866949}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5414712160010501, "res": {"Yes": 0.5414712160010501, "yes": 0.4313783205494374}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6425191225062623, "res": {"Yes": 0.6425191225062623, "yes": 0.35302090600228264}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4596710748253791, "res": {"yes": 0.5336581572346114, "Yes": 0.4596710748253791}, "ground_truth": 1}, {"key": "40339241", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7097259069721605, "res": {"Yes": 0.7097259069721605, "yes": 0.26393933339224995}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.43533301023509147, "res": {"yes": 0.5574880003028271, "Yes": 0.43533301023509147}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.871931058109272, "res": {"Yes": 0.871931058109272, "yes": 0.12331728402581203}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9846989259177722, "res": {"Yes": 0.9846989259177722, "yes": 0.011122040597784955}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7241407133682443, "res": {"Yes": 0.7241407133682443, "yes": 0.2675013676363214}, "ground_truth": 1}, {"key": "31792608", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.820528429547376, "res": {"Yes": 0.820528429547376, "yes": 0.17401800641119733}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6665636133712501, "res": {"Yes": 0.6665636133712501, "yes": 0.3250604545340261}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7635263826817693, "res": {"Yes": 0.7635263826817693, "yes": 0.2273380186776909}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6054181516137548, "res": {"Yes": 0.6054181516137548, "yes": 0.381521959107242}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6986420196849887, "res": {"Yes": 0.6986420196849887, "yes": 0.29596895852656174}, "ground_truth": 1}, {"key": "33132662", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6909955466675473, "res": {"Yes": 0.6909955466675473, "yes": 0.2994038943201114}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7123874672607086, "res": {"Yes": 0.7123874672607086, "yes": 0.2755205669498716}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6936699832729322, "res": {"Yes": 0.6936699832729322, "yes": 0.2625172197642712}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.44498380087792566, "res": {"yes": 0.5133184326341216, "Yes": 0.44498380087792566}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6017407646038028, "res": {"Yes": 0.6017407646038028, "yes": 0.3148432446657906}, "ground_truth": 1}, {"key": "37577457", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5929718568785979, "res": {"Yes": 0.5929718568785979, "yes": 0.38403328133761944}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.4717164101114207, "res": {"yes": 0.48309077646351084, "Yes": 0.4717164101114207}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9245253391865068, "res": {"Yes": 0.9245253391865068, "yes": 0.0668013158687213}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.916871167548344, "res": {"Yes": 0.916871167548344, "yes": 0.07362247006835795}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8747952937457073, "res": {"Yes": 0.8747952937457073, "yes": 0.11463244495274766}, "ground_truth": 1}, {"key": "38701278", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9359196314748255, "res": {"Yes": 0.9359196314748255, "yes": 0.05536460435382599}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9527036062329859, "res": {"Yes": 0.9527036062329859, "yes": 0.03332964625933793}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9351418219181473, "res": {"Yes": 0.9351418219181473, "yes": 0.059887528552743645}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9868819816444782, "res": {"Yes": 0.9868819816444782, "yes": 0.009387149495053697}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9898625934314877, "res": {"Yes": 0.9898625934314877, "yes": 0.008314527441522944}, "ground_truth": 1}, {"key": "34570783", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9796482072082066, "res": {"Yes": 0.9796482072082066, "yes": 0.01847874363867246}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9340332003236388, "res": {"Yes": 0.9340332003236388, "yes": 0.05665783475566269}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.530184266615892, "res": {"Yes": 0.530184266615892, "yes": 0.46381649031288325}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6222445477041398, "res": {"Yes": 0.6222445477041398, "yes": 0.3689269064737917}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.637484573929483, "res": {"Yes": 0.637484573929483, "yes": 0.34839147615119287}, "ground_truth": 1}, {"key": "39064526", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5489431745635839, "res": {"Yes": 0.5489431745635839, "yes": 0.4347603148911957}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.40113437823247156, "res": {"yes": 0.5863931323636609, "Yes": 0.40113437823247156}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8700667727424338, "res": {"Yes": 0.8700667727424338, "yes": 0.11793076075476266}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8498257589343431, "res": {"Yes": 0.8498257589343431, "yes": 0.13977565583554294}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8860912036659419, "res": {"Yes": 0.8860912036659419, "yes": 0.10667306385651436}, "ground_truth": 1}, {"key": "40741545", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9939665888673432, "res": {"Yes": 0.9939665888673432, "yes": 0.0037945479214996068}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7894718037371583, "res": {"Yes": 0.7894718037371583, "yes": 0.1990062395950897}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9010452415836046, "res": {"Yes": 0.9010452415836046, "yes": 0.08370809430579682}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.933515557840979, "res": {"Yes": 0.933515557840979, "yes": 0.06014577380264647}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8967056501461429, "res": {"Yes": 0.8967056501461429, "yes": 0.09073903859177418}, "ground_truth": 1}, {"key": "36929751", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9536820728947706, "res": {"Yes": 0.9536820728947706, "yes": 0.02371228525395672}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9610223339109017, "res": {"Yes": 0.9610223339109017, "yes": 0.02874406407536033}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9127498616694537, "res": {"Yes": 0.9127498616694537, "yes": 0.07756265636750032}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9314996636731308, "res": {"Yes": 0.9314996636731308, "yes": 0.06048348054941161}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9441247888450748, "res": {"Yes": 0.9441247888450748, "yes": 0.04786195256040994}, "ground_truth": 1}, {"key": "23984730", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.947647655715643, "res": {"Yes": 0.947647655715643, "yes": 0.04192101610990984}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.929031651725536, "res": {"Yes": 0.929031651725536, "yes": 0.06115753517453433}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7526329878001754, "res": {"Yes": 0.7526329878001754, "yes": 0.2393906123185346}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8693050496889315, "res": {"Yes": 0.8693050496889315, "yes": 0.12462739914159679}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8114294123304927, "res": {"Yes": 0.8114294123304927, "yes": 0.1839711733909756}, "ground_truth": 1}, {"key": "36007415", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8142808808749751, "res": {"Yes": 0.8142808808749751, "yes": 0.17765736490256823}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8321388542154383, "res": {"Yes": 0.8321388542154383, "yes": 0.16394881557889643}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9920950051127765, "res": {"Yes": 0.9920950051127765, "yes": 0.005930243526217227}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9914956711425468, "res": {"Yes": 0.9914956711425468, "yes": 0.0072108171389122224}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9836650462810893, "res": {"Yes": 0.9836650462810893, "yes": 0.012744557006207498}, "ground_truth": 1}, {"key": "38875041", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.981693143127917, "res": {"Yes": 0.981693143127917, "yes": 0.012982351769852324}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.985273257009134, "res": {"Yes": 0.985273257009134, "yes": 0.011552024222624814}, "ground_truth": 0}]