[{"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9418747528533296, "res": {"Yes": 0.9418747528533296, "yes": 0.051322737442530776}, "ground_truth": 0}, {"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9069906079429975, "res": {"Yes": 0.9069906079429975, "yes": 0.08096773799571137}, "ground_truth": 0}, {"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9323268577864411, "res": {"Yes": 0.9323268577864411, "yes": 0.06362487937420536}, "ground_truth": 1}, {"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9092673169357499, "res": {"Yes": 0.9092673169357499, "yes": 0.08375824209873423}, "ground_truth": 0}, {"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.851298081250389, "res": {"Yes": 0.851298081250389, "yes": 0.12508637253710098}, "ground_truth": 0}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8366828525786577, "res": {"Yes": 0.8366828525786577, "yes": 0.15700398371105156}, "ground_truth": 0}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9412940109309353, "res": {"Yes": 0.9412940109309353, "yes": 0.04770412055585653}, "ground_truth": 0}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9314745321512307, "res": {"Yes": 0.9314745321512307, "yes": 0.060033382288635764}, "ground_truth": 1}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8720963134223668, "res": {"Yes": 0.8720963134223668, "yes": 0.12169879261890862}, "ground_truth": 0}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9501712285974941, "res": {"Yes": 0.9501712285974941, "yes": 0.041288985892237826}, "ground_truth": 0}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5106923984548581, "res": {"Yes": 0.5106923984548581, "yes": 0.4720349733788188}, "ground_truth": 0}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6845050340051227, "res": {"Yes": 0.6845050340051227, "yes": 0.3045664162050531}, "ground_truth": 0}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5586348719565724, "res": {"Yes": 0.5586348719565724, "yes": 0.4346460140417895}, "ground_truth": 1}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7875607329891315, "res": {"Yes": 0.7875607329891315, "yes": 0.20517452921960086}, "ground_truth": 0}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5574080288107959, "res": {"Yes": 0.5574080288107959, "yes": 0.4357481936378535}, "ground_truth": 0}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8606678941879178, "res": {"Yes": 0.8606678941879178, "yes": 0.13431896120172718}, "ground_truth": 0}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8985192395695186, "res": {"Yes": 0.8985192395695186, "yes": 0.09822467718776255}, "ground_truth": 0}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7900701183932263, "res": {"Yes": 0.7900701183932263, "yes": 0.20542998678627392}, "ground_truth": 1}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8334075172967225, "res": {"Yes": 0.8334075172967225, "yes": 0.16111137225692473}, "ground_truth": 0}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8783528384125893, "res": {"Yes": 0.8783528384125893, "yes": 0.11735532272770677}, "ground_truth": 0}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.956028796043463, "res": {"Yes": 0.956028796043463, "yes": 0.03855997722029311}, "ground_truth": 0}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.885939123593554, "res": {"Yes": 0.885939123593554, "yes": 0.10346900290220964}, "ground_truth": 0}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9126130324671655, "res": {"Yes": 0.9126130324671655, "yes": 0.08257641268084764}, "ground_truth": 1}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9123934659364019, "res": {"Yes": 0.9123934659364019, "yes": 0.08259577086067005}, "ground_truth": 0}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9204871483531543, "res": {"Yes": 0.9204871483531543, "yes": 0.07108284863752957}, "ground_truth": 0}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8900817558226313, "res": {"Yes": 0.8900817558226313, "yes": 0.10469653041525052}, "ground_truth": 0}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8434158709719329, "res": {"Yes": 0.8434158709719329, "yes": 0.14980886960584566}, "ground_truth": 0}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9040069053116977, "res": {"Yes": 0.9040069053116977, "yes": 0.09115686183497683}, "ground_truth": 1}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8893961674996455, "res": {"Yes": 0.8893961674996455, "yes": 0.10518501834386007}, "ground_truth": 0}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9115658854765747, "res": {"Yes": 0.9115658854765747, "yes": 0.0837994361531025}, "ground_truth": 0}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8979980070527953, "res": {"Yes": 0.8979980070527953, "yes": 0.0962441968595705}, "ground_truth": 0}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8819179275230244, "res": {"Yes": 0.8819179275230244, "yes": 0.10829235699451914}, "ground_truth": 0}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8257556933274317, "res": {"Yes": 0.8257556933274317, "yes": 0.1639317166091583}, "ground_truth": 1}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8610963810454278, "res": {"Yes": 0.8610963810454278, "yes": 0.13050723671527434}, "ground_truth": 0}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8742941971111811, "res": {"Yes": 0.8742941971111811, "yes": 0.11820622623197734}, "ground_truth": 0}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8951766015542499, "res": {"Yes": 0.8951766015542499, "yes": 0.09572638626299523}, "ground_truth": 0}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9312905747836933, "res": {"Yes": 0.9312905747836933, "yes": 0.058049005660675765}, "ground_truth": 0}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8343698247162706, "res": {"Yes": 0.8343698247162706, "yes": 0.1535612298438602}, "ground_truth": 1}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8363382604248852, "res": {"Yes": 0.8363382604248852, "yes": 0.1558708966418201}, "ground_truth": 0}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8935566893541932, "res": {"Yes": 0.8935566893541932, "yes": 0.09095834722996117}, "ground_truth": 0}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9596384116534931, "res": {"Yes": 0.9596384116534931, "yes": 0.03650097888449572}, "ground_truth": 0}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8651245620912332, "res": {"Yes": 0.8651245620912332, "yes": 0.1307552400468509}, "ground_truth": 0}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6239022334820186, "res": {"Yes": 0.6239022334820186, "yes": 0.3720910115231243}, "ground_truth": 1}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9775511354598223, "res": {"Yes": 0.9775511354598223, "yes": 0.016839330962918845}, "ground_truth": 0}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9034973969749427, "res": {"Yes": 0.9034973969749427, "yes": 0.09229075546177427}, "ground_truth": 0}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6514623133898803, "res": {"Yes": 0.6514623133898803, "yes": 0.3458709328365701}, "ground_truth": 0}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7141767273307075, "res": {"Yes": 0.7141767273307075, "yes": 0.2824797252493531}, "ground_truth": 0}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5946186876310857, "res": {"Yes": 0.5946186876310857, "yes": 0.4024715370611476}, "ground_truth": 1}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7331852357124162, "res": {"Yes": 0.7331852357124162, "yes": 0.2627291829385757}, "ground_truth": 0}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7540354621884365, "res": {"Yes": 0.7540354621884365, "yes": 0.24136902320259937}, "ground_truth": 0}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8870443889612204, "res": {"Yes": 0.8870443889612204, "yes": 0.10209765722103492}, "ground_truth": 0}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8504598804341453, "res": {"Yes": 0.8504598804341453, "yes": 0.14029089368542327}, "ground_truth": 0}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8346911356377791, "res": {"Yes": 0.8346911356377791, "yes": 0.1610746108364133}, "ground_truth": 1}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8474072205479356, "res": {"Yes": 0.8474072205479356, "yes": 0.14354034980456576}, "ground_truth": 0}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8861623419232236, "res": {"Yes": 0.8861623419232236, "yes": 0.10644647524791669}, "ground_truth": 0}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7943017199338472, "res": {"Yes": 0.7943017199338472, "yes": 0.19343214497034136}, "ground_truth": 0}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8922967253328516, "res": {"Yes": 0.8922967253328516, "yes": 0.09785996018284478}, "ground_truth": 0}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7927655583182774, "res": {"Yes": 0.7927655583182774, "yes": 0.19313923988808732}, "ground_truth": 1}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8250927621488006, "res": {"Yes": 0.8250927621488006, "yes": 0.1445324253091049}, "ground_truth": 0}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9190802658647315, "res": {"Yes": 0.9190802658647315, "yes": 0.07123757934586904}, "ground_truth": 0}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5442621961627767, "res": {"Yes": 0.5442621961627767, "yes": 0.4337933111112352}, "ground_truth": 0}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5249256738668248, "res": {"Yes": 0.5249256738668248, "yes": 0.46354535753715276}, "ground_truth": 0}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.610175132842789, "res": {"Yes": 0.610175132842789, "yes": 0.3773417890481473}, "ground_truth": 1}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7019631775251115, "res": {"Yes": 0.7019631775251115, "yes": 0.2897598217533313}, "ground_truth": 0}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7081449674236667, "res": {"Yes": 0.7081449674236667, "yes": 0.28253775254411845}, "ground_truth": 0}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8152564929317915, "res": {"Yes": 0.8152564929317915, "yes": 0.16987717551995749}, "ground_truth": 0}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7644005095614548, "res": {"Yes": 0.7644005095614548, "yes": 0.22260114419825885}, "ground_truth": 0}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7330189168450842, "res": {"Yes": 0.7330189168450842, "yes": 0.2555991198374858}, "ground_truth": 1}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.740726194550484, "res": {"Yes": 0.740726194550484, "yes": 0.24376154649261533}, "ground_truth": 0}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7116695289868759, "res": {"Yes": 0.7116695289868759, "yes": 0.27334088492478065}, "ground_truth": 0}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8883614806656073, "res": {"Yes": 0.8883614806656073, "yes": 0.10735022056769279}, "ground_truth": 0}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.824455607627178, "res": {"Yes": 0.824455607627178, "yes": 0.15918728131512203}, "ground_truth": 0}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8619215170235563, "res": {"Yes": 0.8619215170235563, "yes": 0.13101567584514315}, "ground_truth": 1}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8634857622073121, "res": {"Yes": 0.8634857622073121, "yes": 0.12426826783295157}, "ground_truth": 0}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8507501153626623, "res": {"Yes": 0.8507501153626623, "yes": 0.13690843969168762}, "ground_truth": 0}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8575948446951133, "res": {"Yes": 0.8575948446951133, "yes": 0.13547126873525556}, "ground_truth": 0}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8450058825935701, "res": {"Yes": 0.8450058825935701, "yes": 0.1469515115526981}, "ground_truth": 0}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8688859023308858, "res": {"Yes": 0.8688859023308858, "yes": 0.1252493911633794}, "ground_truth": 1}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8234256901895507, "res": {"Yes": 0.8234256901895507, "yes": 0.16918985368648548}, "ground_truth": 0}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9460132771371964, "res": {"Yes": 0.9460132771371964, "yes": 0.04977218425970701}, "ground_truth": 0}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6913013107274161, "res": {"Yes": 0.6913013107274161, "yes": 0.28091405795628904}, "ground_truth": 0}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9347021000803365, "res": {"Yes": 0.9347021000803365, "yes": 0.06117962926619214}, "ground_truth": 0}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8255453836234083, "res": {"Yes": 0.8255453836234083, "yes": 0.16419315004134188}, "ground_truth": 1}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8451611666907862, "res": {"Yes": 0.8451611666907862, "yes": 0.14488790565184873}, "ground_truth": 0}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8559271599306635, "res": {"Yes": 0.8559271599306635, "yes": 0.12844827305611012}, "ground_truth": 0}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9216142471618577, "res": {"Yes": 0.9216142471618577, "yes": 0.0676036367029046}, "ground_truth": 0}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9360912349664497, "res": {"Yes": 0.9360912349664497, "yes": 0.05230650693850781}, "ground_truth": 0}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8798509859467073, "res": {"Yes": 0.8798509859467073, "yes": 0.110216529284937}, "ground_truth": 1}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8703673021933365, "res": {"Yes": 0.8703673021933365, "yes": 0.09554817580508249}, "ground_truth": 0}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.994362598123043, "res": {"Yes": 0.994362598123043, " Yes": 0.002815955340526871}, "ground_truth": 0}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.901677822488993, "res": {"Yes": 0.901677822488993, "yes": 0.09504131378195636}, "ground_truth": 0}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9213041945661467, "res": {"Yes": 0.9213041945661467, "yes": 0.06585805729260699}, "ground_truth": 0}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.582562501841341, "res": {"Yes": 0.582562501841341, "yes": 0.40236718972080493}, "ground_truth": 1}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5941012616442438, "res": {"Yes": 0.5941012616442438, "yes": 0.3996134396562137}, "ground_truth": 0}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7779690555127295, "res": {"Yes": 0.7779690555127295, "yes": 0.21383888956415426}, "ground_truth": 0}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7906892781057808, "res": {"Yes": 0.7906892781057808, "yes": 0.20559688423808084}, "ground_truth": 0}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7538655820575105, "res": {"Yes": 0.7538655820575105, "yes": 0.23503566744338827}, "ground_truth": 0}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9251053741110294, "res": {"Yes": 0.9251053741110294, "yes": 0.06966627066054422}, "ground_truth": 1}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9097414415579141, "res": {"Yes": 0.9097414415579141, "yes": 0.08471100979485002}, "ground_truth": 0}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9087381637304763, "res": {"Yes": 0.9087381637304763, "yes": 0.08677091448918459}, "ground_truth": 0}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8841214225068103, "res": {"Yes": 0.8841214225068103, "yes": 0.10799190590397759}, "ground_truth": 0}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9000654149692163, "res": {"Yes": 0.9000654149692163, "yes": 0.09580209746867079}, "ground_truth": 0}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.861654233516184, "res": {"Yes": 0.861654233516184, "yes": 0.1332397422683424}, "ground_truth": 1}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8900183887077631, "res": {"Yes": 0.8900183887077631, "yes": 0.10144131754102066}, "ground_truth": 0}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9425664454440863, "res": {"Yes": 0.9425664454440863, "yes": 0.052385355599870016}, "ground_truth": 0}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8477732523858653, "res": {"Yes": 0.8477732523858653, "yes": 0.13939910514627865}, "ground_truth": 0}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7526373606105374, "res": {"Yes": 0.7526373606105374, "yes": 0.23660954166418963}, "ground_truth": 0}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9195584222311782, "res": {"Yes": 0.9195584222311782, "yes": 0.0742082462158007}, "ground_truth": 1}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8922794506354688, "res": {"Yes": 0.8922794506354688, "yes": 0.09775702653393473}, "ground_truth": 0}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8738056032297088, "res": {"Yes": 0.8738056032297088, "yes": 0.11019902828970232}, "ground_truth": 0}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7537341193638798, "res": {"Yes": 0.7537341193638798, "yes": 0.23282820848434563}, "ground_truth": 0}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7463225869954649, "res": {"Yes": 0.7463225869954649, "yes": 0.23381665415407693}, "ground_truth": 0}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7807122616579844, "res": {"Yes": 0.7807122616579844, "yes": 0.20428109845791964}, "ground_truth": 1}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8321779739824778, "res": {"Yes": 0.8321779739824778, "yes": 0.15581293903324434}, "ground_truth": 0}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7734381680840205, "res": {"Yes": 0.7734381680840205, "yes": 0.21879122814126717}, "ground_truth": 0}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7906692897334844, "res": {"Yes": 0.7906692897334844, "yes": 0.2016332552079685}, "ground_truth": 0}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8879644682699789, "res": {"Yes": 0.8879644682699789, "yes": 0.10666875435178713}, "ground_truth": 0}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7988191054799292, "res": {"Yes": 0.7988191054799292, "yes": 0.19211006807261252}, "ground_truth": 1}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8427641034279884, "res": {"Yes": 0.8427641034279884, "yes": 0.1492216044129283}, "ground_truth": 0}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7668763523982621, "res": {"Yes": 0.7668763523982621, "yes": 0.22339007955060158}, "ground_truth": 0}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8746641192791644, "res": {"Yes": 0.8746641192791644, "yes": 0.11711549433611096}, "ground_truth": 0}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7828327479176316, "res": {"Yes": 0.7828327479176316, "yes": 0.19225699237431243}, "ground_truth": 0}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8019693730995365, "res": {"Yes": 0.8019693730995365, "yes": 0.18261930381625685}, "ground_truth": 1}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9096378497399382, "res": {"Yes": 0.9096378497399382, "yes": 0.08053854413658655}, "ground_truth": 0}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5791629543426087, "res": {"Yes": 0.5791629543426087, "yes": 0.41052629340384145}, "ground_truth": 0}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8578815522626674, "res": {"Yes": 0.8578815522626674, "yes": 0.1331065957450606}, "ground_truth": 0}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8428927359011479, "res": {"Yes": 0.8428927359011479, "yes": 0.1531377861165298}, "ground_truth": 0}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5741024352274511, "res": {"Yes": 0.5741024352274511, "yes": 0.41597540627489693}, "ground_truth": 1}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.46526490845851926, "res": {"yes": 0.5300626350918616, "Yes": 0.46526490845851926}, "ground_truth": 0}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8501164320467461, "res": {"Yes": 0.8501164320467461, "yes": 0.13851179424178453}, "ground_truth": 0}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9087165996297101, "res": {"Yes": 0.9087165996297101, "yes": 0.0858317752618789}, "ground_truth": 0}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9767949618599281, "res": {"Yes": 0.9767949618599281, "yes": 0.01880714552954649}, "ground_truth": 0}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.968929772420508, "res": {"Yes": 0.968929772420508, "yes": 0.0278828715952187}, "ground_truth": 1}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.911105301034225, "res": {"Yes": 0.911105301034225, "yes": 0.08334172123778003}, "ground_truth": 0}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9381313817055875, "res": {"Yes": 0.9381313817055875, "yes": 0.059187382100738706}, "ground_truth": 0}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9263026054281965, "res": {"Yes": 0.9263026054281965, "yes": 0.07117786455374148}, "ground_truth": 0}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.87140687999053, "res": {"Yes": 0.87140687999053, "yes": 0.12316643600798446}, "ground_truth": 0}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.866588898644612, "res": {"Yes": 0.866588898644612, "yes": 0.13003885351343208}, "ground_truth": 1}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8571258428779471, "res": {"Yes": 0.8571258428779471, "yes": 0.13899031841053625}, "ground_truth": 0}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.964383903020627, "res": {"Yes": 0.964383903020627, "yes": 0.03267743174672627}, "ground_truth": 0}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7730539266660656, "res": {"Yes": 0.7730539266660656, "yes": 0.2119992589150417}, "ground_truth": 0}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7196170884762826, "res": {"Yes": 0.7196170884762826, "yes": 0.2711449853155892}, "ground_truth": 0}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5601951353317854, "res": {"Yes": 0.5601951353317854, "yes": 0.4304083542029935}, "ground_truth": 1}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8522049495206657, "res": {"Yes": 0.8522049495206657, "yes": 0.14066030215238912}, "ground_truth": 0}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8270495255165118, "res": {"Yes": 0.8270495255165118, "yes": 0.1615869647216211}, "ground_truth": 0}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8486061831036519, "res": {"Yes": 0.8486061831036519, "yes": 0.14520114368558146}, "ground_truth": 0}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8080334118633149, "res": {"Yes": 0.8080334118633149, "yes": 0.18477263680036182}, "ground_truth": 0}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7080918939473116, "res": {"Yes": 0.7080918939473116, "yes": 0.28461918078701043}, "ground_truth": 1}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8174323760993618, "res": {"Yes": 0.8174323760993618, "yes": 0.1665287020056178}, "ground_truth": 0}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9702672460880829, "res": {"Yes": 0.9702672460880829, "yes": 0.02245879459677701}, "ground_truth": 0}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8205175247970096, "res": {"Yes": 0.8205175247970096, "yes": 0.16283328385926732}, "ground_truth": 0}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7946136001734492, "res": {"Yes": 0.7946136001734492, "yes": 0.1897958989989433}, "ground_truth": 0}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7913084166019596, "res": {"Yes": 0.7913084166019596, "yes": 0.189768095936364}, "ground_truth": 1}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8199717504401669, "res": {"Yes": 0.8199717504401669, "yes": 0.16028833368901665}, "ground_truth": 0}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8540726316837504, "res": {"Yes": 0.8540726316837504, "yes": 0.12722675654500956}, "ground_truth": 0}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7903069774086862, "res": {"Yes": 0.7903069774086862, "yes": 0.20434543670534944}, "ground_truth": 0}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7500451606984402, "res": {"Yes": 0.7500451606984402, "yes": 0.24275183806400683}, "ground_truth": 0}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.729364603520044, "res": {"Yes": 0.729364603520044, "yes": 0.26418203154842357}, "ground_truth": 1}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7210849544483129, "res": {"Yes": 0.7210849544483129, "yes": 0.2738595487004665}, "ground_truth": 0}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7901905421566304, "res": {"Yes": 0.7901905421566304, "yes": 0.20436969394832522}, "ground_truth": 0}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8426309487921647, "res": {"Yes": 0.8426309487921647, "yes": 0.14963761617929575}, "ground_truth": 0}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8680936312809575, "res": {"Yes": 0.8680936312809575, "yes": 0.12607466098722012}, "ground_truth": 0}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.772219262886632, "res": {"Yes": 0.772219262886632, "yes": 0.21292039830504297}, "ground_truth": 1}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9225688331226203, "res": {"Yes": 0.9225688331226203, "yes": 0.07080589634791382}, "ground_truth": 0}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9273858174032251, "res": {"Yes": 0.9273858174032251, "yes": 0.06655352951567622}, "ground_truth": 0}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8677972799874649, "res": {"Yes": 0.8677972799874649, "yes": 0.11021121697625312}, "ground_truth": 0}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8822150957279803, "res": {"Yes": 0.8822150957279803, "yes": 0.11097470350427574}, "ground_truth": 0}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7666854239580922, "res": {"Yes": 0.7666854239580922, "yes": 0.22530647330984294}, "ground_truth": 1}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8778275560913558, "res": {"Yes": 0.8778275560913558, "yes": 0.11634512378454055}, "ground_truth": 0}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8215147062178363, "res": {"Yes": 0.8215147062178363, "yes": 0.1611284026290116}, "ground_truth": 0}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8288850039956932, "res": {"Yes": 0.8288850039956932, "yes": 0.1659165048492063}, "ground_truth": 0}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7484489578745823, "res": {"Yes": 0.7484489578745823, "yes": 0.2299562972661021}, "ground_truth": 0}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7767162359620551, "res": {"Yes": 0.7767162359620551, "yes": 0.21173758026327244}, "ground_truth": 1}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8755695675916182, "res": {"Yes": 0.8755695675916182, "yes": 0.10928217285328885}, "ground_truth": 0}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8645413533599631, "res": {"Yes": 0.8645413533599631, "yes": 0.12010679183457047}, "ground_truth": 0}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9032481690729062, "res": {"Yes": 0.9032481690729062, "yes": 0.09595637671578754}, "ground_truth": 0}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8704217627792163, "res": {"Yes": 0.8704217627792163, "yes": 0.12405607473387391}, "ground_truth": 0}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8765639645890406, "res": {"Yes": 0.8765639645890406, "yes": 0.12026559382805503}, "ground_truth": 1}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.912245825248478, "res": {"Yes": 0.912245825248478, "yes": 0.08508455944451672}, "ground_truth": 0}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9923195006349845, "res": {"Yes": 0.9923195006349845, "yes": 0.0058993470717468134}, "ground_truth": 0}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7263106740519764, "res": {"Yes": 0.7263106740519764, "yes": 0.2617342769485953}, "ground_truth": 0}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.715005774569331, "res": {"Yes": 0.715005774569331, "yes": 0.27950295891082383}, "ground_truth": 0}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7966981776271714, "res": {"Yes": 0.7966981776271714, "yes": 0.1924700926849333}, "ground_truth": 1}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5340421396143142, "res": {"Yes": 0.5340421396143142, "yes": 0.45723282679492894}, "ground_truth": 0}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8216596341942848, "res": {"Yes": 0.8216596341942848, "yes": 0.16640576595890916}, "ground_truth": 0}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8564411700786858, "res": {"Yes": 0.8564411700786858, "yes": 0.12998327378456856}, "ground_truth": 0}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9386712618007259, "res": {"Yes": 0.9386712618007259, "yes": 0.05803790194799686}, "ground_truth": 0}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.923659963445497, "res": {"Yes": 0.923659963445497, "yes": 0.07083476271383099}, "ground_truth": 1}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9158951462678505, "res": {"Yes": 0.9158951462678505, "yes": 0.07819201082602235}, "ground_truth": 0}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9124793170764154, "res": {"Yes": 0.9124793170764154, "yes": 0.07763851093197796}, "ground_truth": 0}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8043368418632667, "res": {"Yes": 0.8043368418632667, "yes": 0.18735960263472962}, "ground_truth": 0}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8003896559153982, "res": {"Yes": 0.8003896559153982, "yes": 0.19269294799540193}, "ground_truth": 0}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7461118925592135, "res": {"Yes": 0.7461118925592135, "yes": 0.24082132752825922}, "ground_truth": 1}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8429731854228524, "res": {"Yes": 0.8429731854228524, "yes": 0.14798033287912507}, "ground_truth": 0}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8175897224505301, "res": {"Yes": 0.8175897224505301, "yes": 0.17082881598533148}, "ground_truth": 0}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9122501857939443, "res": {"Yes": 0.9122501857939443, "yes": 0.07101972667772616}, "ground_truth": 0}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5918119183896972, "res": {"Yes": 0.5918119183896972, "yes": 0.39761521413688444}, "ground_truth": 0}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4526620311717082, "res": {"yes": 0.5313136787736427, "Yes": 0.4526620311717082}, "ground_truth": 1}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9748454415863893, "res": {"Yes": 0.9748454415863893, "yes": 0.019469378130305057}, "ground_truth": 0}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9661725456835618, "res": {"Yes": 0.9661725456835618, "yes": 0.02585064310501557}, "ground_truth": 0}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6382722771395078, "res": {"Yes": 0.6382722771395078, "yes": 0.35430431083354996}, "ground_truth": 0}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6164185696602082, "res": {"Yes": 0.6164185696602082, "yes": 0.37452091117510417}, "ground_truth": 0}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6056974348630458, "res": {"Yes": 0.6056974348630458, "yes": 0.38618328137294433}, "ground_truth": 1}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9710542344773352, "res": {"Yes": 0.9710542344773352, "yes": 0.025371996833840654}, "ground_truth": 0}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7519530052527704, "res": {"Yes": 0.7519530052527704, "yes": 0.23781150854418753}, "ground_truth": 0}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7848606444126649, "res": {"Yes": 0.7848606444126649, "yes": 0.20347710226756383}, "ground_truth": 0}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7791009128294072, "res": {"Yes": 0.7791009128294072, "yes": 0.2091864331486861}, "ground_truth": 0}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7822716423667102, "res": {"Yes": 0.7822716423667102, "yes": 0.2111475712826693}, "ground_truth": 1}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7972985963336064, "res": {"Yes": 0.7972985963336064, "yes": 0.19301821814470205}, "ground_truth": 0}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7107674906717764, "res": {"Yes": 0.7107674906717764, "yes": 0.2820053321188244}, "ground_truth": 0}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9353430580331805, "res": {"Yes": 0.9353430580331805, "yes": 0.05483132632995526}, "ground_truth": 0}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9620663190222745, "res": {"Yes": 0.9620663190222745, "yes": 0.0315277017178227}, "ground_truth": 0}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9574737341506893, "res": {"Yes": 0.9574737341506893, "yes": 0.03603721948582662}, "ground_truth": 1}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9468011753036268, "res": {"Yes": 0.9468011753036268, "yes": 0.04393820781295816}, "ground_truth": 0}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9394364450956005, "res": {"Yes": 0.9394364450956005, "yes": 0.05450244586843674}, "ground_truth": 0}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7731717799891987, "res": {"Yes": 0.7731717799891987, "yes": 0.21163603410381374}, "ground_truth": 0}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8406978569246643, "res": {"Yes": 0.8406978569246643, "yes": 0.14662555307178904}, "ground_truth": 0}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8478261211743158, "res": {"Yes": 0.8478261211743158, "yes": 0.13916046322243972}, "ground_truth": 1}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8499446748736983, "res": {"Yes": 0.8499446748736983, "yes": 0.13719634554698712}, "ground_truth": 0}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8547049220692204, "res": {"Yes": 0.8547049220692204, "yes": 0.13181068373421903}, "ground_truth": 0}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.30301380907691006, "res": {"yes": 0.5601972080576207, "Yes": 0.30301380907691006}, "ground_truth": 0}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.4454474216606694, "res": {"yes": 0.4530057773719345, "Yes": 0.4454474216606694}, "ground_truth": 0}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3253179393267031, "res": {"yes": 0.44484732747883965, "Yes": 0.3253179393267031}, "ground_truth": 1}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9065467755073945, "res": {"Yes": 0.9065467755073945, "yes": 0.09178012574010411}, "ground_truth": 0}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.44202125934301223, "res": {"yes": 0.47993996299343655, "Yes": 0.44202125934301223}, "ground_truth": 0}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8291871710655312, "res": {"Yes": 0.8291871710655312, "yes": 0.1660331851604393}, "ground_truth": 0}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9784951927841756, "res": {"Yes": 0.9784951927841756, "yes": 0.019921381038238588}, "ground_truth": 0}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6877074152727477, "res": {"Yes": 0.6877074152727477, "yes": 0.3095551227905686}, "ground_truth": 1}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8867165926142739, "res": {"Yes": 0.8867165926142739, "yes": 0.11106987180342522}, "ground_truth": 0}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8815262753892182, "res": {"Yes": 0.8815262753892182, "yes": 0.11465433037538625}, "ground_truth": 0}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9437185249702132, "res": {"Yes": 0.9437185249702132, "yes": 0.0451216005156565}, "ground_truth": 0}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8373607899955302, "res": {"Yes": 0.8373607899955302, "yes": 0.14931135834547735}, "ground_truth": 0}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9019180289902645, "res": {"Yes": 0.9019180289902645, "yes": 0.08790121076851813}, "ground_truth": 1}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9187328879399758, "res": {"Yes": 0.9187328879399758, "yes": 0.07275613838964974}, "ground_truth": 0}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9923473279462863, "res": {"Yes": 0.9923473279462863, "yes": 0.004319558849983057}, "ground_truth": 0}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.785884393053324, "res": {"Yes": 0.785884393053324, "yes": 0.2076462849474343}, "ground_truth": 0}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8045323355560529, "res": {"Yes": 0.8045323355560529, "yes": 0.19036633199844488}, "ground_truth": 0}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7997544707369657, "res": {"Yes": 0.7997544707369657, "yes": 0.19691226594010577}, "ground_truth": 1}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8790127379423224, "res": {"Yes": 0.8790127379423224, "yes": 0.11566614474553603}, "ground_truth": 0}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6717670838183134, "res": {"Yes": 0.6717670838183134, "yes": 0.32157219620538}, "ground_truth": 0}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9816412188240585, "res": {"Yes": 0.9816412188240585, "yes": 0.014401181891062273}, "ground_truth": 0}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9261872369921844, "res": {"Yes": 0.9261872369921844, "yes": 0.07115028135940882}, "ground_truth": 0}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9646917709565882, "res": {"Yes": 0.9646917709565882, "yes": 0.03129022282631253}, "ground_truth": 1}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9216689327655821, "res": {"Yes": 0.9216689327655821, "yes": 0.07604211100326154}, "ground_truth": 0}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8027598466019346, "res": {"Yes": 0.8027598466019346, "yes": 0.19541456553641925}, "ground_truth": 0}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9141698657993018, "res": {"Yes": 0.9141698657993018, "yes": 0.08132574873203949}, "ground_truth": 0}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8814055850742394, "res": {"Yes": 0.8814055850742394, "yes": 0.11597957279238184}, "ground_truth": 0}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9204541126622289, "res": {"Yes": 0.9204541126622289, "yes": 0.07611931009363129}, "ground_truth": 1}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7746065158862959, "res": {"Yes": 0.7746065158862959, "yes": 0.21543899400070005}, "ground_truth": 0}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8798311543289873, "res": {"Yes": 0.8798311543289873, "yes": 0.1181506350992173}, "ground_truth": 0}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9609431729566325, "res": {"Yes": 0.9609431729566325, "yes": 0.03105133737101274}, "ground_truth": 0}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5391095941917369, "res": {"Yes": 0.5391095941917369, "yes": 0.4551371394773306}, "ground_truth": 0}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6770982303950552, "res": {"Yes": 0.6770982303950552, "yes": 0.3143035626668304}, "ground_truth": 1}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9627770828148455, "res": {"Yes": 0.9627770828148455, "yes": 0.029546936861865034}, "ground_truth": 0}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.740527476965374, "res": {"Yes": 0.740527476965374, "yes": 0.2534319332234259}, "ground_truth": 0}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7024366919891732, "res": {"Yes": 0.7024366919891732, "yes": 0.28596184104028005}, "ground_truth": 0}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8466561829981291, "res": {"Yes": 0.8466561829981291, "yes": 0.14937444078531584}, "ground_truth": 0}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.815480368640426, "res": {"Yes": 0.815480368640426, "yes": 0.1716054384210686}, "ground_truth": 1}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.717000973960926, "res": {"Yes": 0.717000973960926, "yes": 0.25081642046954156}, "ground_truth": 0}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7472978687377346, "res": {"Yes": 0.7472978687377346, "yes": 0.25189800889544717}, "ground_truth": 0}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9785184695046738, "res": {"Yes": 0.9785184695046738, "yes": 0.012932134740748204}, "ground_truth": 0}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9574478386662616, "res": {"Yes": 0.9574478386662616, "yes": 0.031144668627559755}, "ground_truth": 0}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9696514838952072, "res": {"Yes": 0.9696514838952072, "yes": 0.025254750559165727}, "ground_truth": 1}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9767837024092968, "res": {"Yes": 0.9767837024092968, "yes": 0.015180038056530138}, "ground_truth": 0}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9101333970952286, "res": {"Yes": 0.9101333970952286, "yes": 0.08267720084724142}, "ground_truth": 0}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9014698825949906, "res": {"Yes": 0.9014698825949906, "yes": 0.09335696500945069}, "ground_truth": 0}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8420604112379887, "res": {"Yes": 0.8420604112379887, "yes": 0.1539089631103259}, "ground_truth": 0}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7831495617943871, "res": {"Yes": 0.7831495617943871, "yes": 0.2125339157124967}, "ground_truth": 1}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7987581019942914, "res": {"Yes": 0.7987581019942914, "yes": 0.19745495432936444}, "ground_truth": 0}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6491308011376815, "res": {"Yes": 0.6491308011376815, "yes": 0.34497342289675537}, "ground_truth": 0}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.892582802251324, "res": {"Yes": 0.892582802251324, "yes": 0.10168443864250076}, "ground_truth": 0}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8787547416223619, "res": {"Yes": 0.8787547416223619, "yes": 0.1122769827949439}, "ground_truth": 0}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8921920561434942, "res": {"Yes": 0.8921920561434942, "yes": 0.10055058950259557}, "ground_truth": 1}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8876971424165098, "res": {"Yes": 0.8876971424165098, "yes": 0.10290344063654598}, "ground_truth": 0}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9315856301546976, "res": {"Yes": 0.9315856301546976, "yes": 0.05790189303689024}, "ground_truth": 0}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6938189302072221, "res": {"Yes": 0.6938189302072221, "yes": 0.2928325895443742}, "ground_truth": 0}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8834520247985831, "res": {"Yes": 0.8834520247985831, "yes": 0.10680267563582081}, "ground_truth": 0}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6881888964084036, "res": {"Yes": 0.6881888964084036, "yes": 0.3020807237920995}, "ground_truth": 1}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8576207015694712, "res": {"Yes": 0.8576207015694712, "yes": 0.13347514476651454}, "ground_truth": 0}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6788639565770653, "res": {"Yes": 0.6788639565770653, "yes": 0.29632516416900645}, "ground_truth": 0}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8619734062607316, "res": {"Yes": 0.8619734062607316, "yes": 0.1316426532186948}, "ground_truth": 0}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8535473080401655, "res": {"Yes": 0.8535473080401655, "yes": 0.13640498327035433}, "ground_truth": 0}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8915180857577261, "res": {"Yes": 0.8915180857577261, "yes": 0.10119458818413922}, "ground_truth": 1}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.908233713298788, "res": {"Yes": 0.908233713298788, "yes": 0.08651829366049893}, "ground_truth": 0}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.909185026511908, "res": {"Yes": 0.909185026511908, "yes": 0.08435769897500732}, "ground_truth": 0}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8090271520518642, "res": {"Yes": 0.8090271520518642, "yes": 0.18748326329171258}, "ground_truth": 0}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8971258910536117, "res": {"Yes": 0.8971258910536117, "yes": 0.09433731145453929}, "ground_truth": 0}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5780120862854046, "res": {"Yes": 0.5780120862854046, "yes": 0.4167284784818455}, "ground_truth": 1}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8266764862777286, "res": {"Yes": 0.8266764862777286, "yes": 0.16667621181132808}, "ground_truth": 0}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7481313461243345, "res": {"Yes": 0.7481313461243345, "yes": 0.24583876125467266}, "ground_truth": 0}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7701598948827371, "res": {"Yes": 0.7701598948827371, "yes": 0.22218669101335847}, "ground_truth": 0}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6566314807290045, "res": {"Yes": 0.6566314807290045, "yes": 0.3362741597801285}, "ground_truth": 0}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6520573244400367, "res": {"Yes": 0.6520573244400367, "yes": 0.34070335738173646}, "ground_truth": 1}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9538163226316261, "res": {"Yes": 0.9538163226316261, "yes": 0.03321059923314568}, "ground_truth": 0}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7341638253296338, "res": {"Yes": 0.7341638253296338, "yes": 0.2542778276733531}, "ground_truth": 0}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5799638238844161, "res": {"Yes": 0.5799638238844161, "yes": 0.4052292064939429}, "ground_truth": 0}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8072757834469951, "res": {"Yes": 0.8072757834469951, "yes": 0.18134169676356302}, "ground_truth": 0}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7980937876937267, "res": {"Yes": 0.7980937876937267, "yes": 0.19249913861337964}, "ground_truth": 1}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5683261030043703, "res": {"Yes": 0.5683261030043703, "yes": 0.41892272637256905}, "ground_truth": 0}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.678028762076355, "res": {"Yes": 0.678028762076355, "yes": 0.3019104888733175}, "ground_truth": 0}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6565743759723617, "res": {"Yes": 0.6565743759723617, "yes": 0.33582959681516306}, "ground_truth": 0}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.86450936592166, "res": {"Yes": 0.86450936592166, "yes": 0.12855557215525157}, "ground_truth": 0}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6023388851001984, "res": {"Yes": 0.6023388851001984, "yes": 0.39044812930912104}, "ground_truth": 1}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8106048674957093, "res": {"Yes": 0.8106048674957093, "yes": 0.18425824498738494}, "ground_truth": 0}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5908002885024286, "res": {"Yes": 0.5908002885024286, "yes": 0.399420884373365}, "ground_truth": 0}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8606338898711782, "res": {"Yes": 0.8606338898711782, "yes": 0.12913133393574736}, "ground_truth": 0}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8408485234799453, "res": {"Yes": 0.8408485234799453, "yes": 0.15354856156497176}, "ground_truth": 0}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8344962413205311, "res": {"Yes": 0.8344962413205311, "yes": 0.15577400624463447}, "ground_truth": 1}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9392036778282811, "res": {"Yes": 0.9392036778282811, "yes": 0.05204868751964422}, "ground_truth": 0}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8864674336624615, "res": {"Yes": 0.8864674336624615, "yes": 0.11103953165910098}, "ground_truth": 0}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9008673027204329, "res": {"Yes": 0.9008673027204329, "yes": 0.08976833345458907}, "ground_truth": 0}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.83150645275536, "res": {"Yes": 0.83150645275536, "yes": 0.15454517972311166}, "ground_truth": 0}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7938510180804256, "res": {"Yes": 0.7938510180804256, "yes": 0.18767921063402607}, "ground_truth": 1}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.793041282550921, "res": {"Yes": 0.793041282550921, "yes": 0.19571503123355322}, "ground_truth": 0}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8345434000355851, "res": {"Yes": 0.8345434000355851, "yes": 0.1553519641246939}, "ground_truth": 0}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8383409832574912, "res": {"Yes": 0.8383409832574912, "yes": 0.15440442229417164}, "ground_truth": 0}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6711502416465889, "res": {"Yes": 0.6711502416465889, "yes": 0.3191968442149495}, "ground_truth": 0}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8405507728886603, "res": {"Yes": 0.8405507728886603, "yes": 0.15514276833035637}, "ground_truth": 1}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7916627646949743, "res": {"Yes": 0.7916627646949743, "yes": 0.2044257806870933}, "ground_truth": 0}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8092339820062818, "res": {"Yes": 0.8092339820062818, "yes": 0.18312379123446812}, "ground_truth": 0}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5109341152834983, "res": {"Yes": 0.5109341152834983, "yes": 0.48484006757691944}, "ground_truth": 0}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8711671540821749, "res": {"Yes": 0.8711671540821749, "yes": 0.12555956644312127}, "ground_truth": 0}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7835111235742408, "res": {"Yes": 0.7835111235742408, "yes": 0.21320423988456194}, "ground_truth": 1}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7046629116630765, "res": {"Yes": 0.7046629116630765, "yes": 0.289252664846016}, "ground_truth": 0}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7251827405189528, "res": {"Yes": 0.7251827405189528, "yes": 0.2698617198177269}, "ground_truth": 0}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8724074892848744, "res": {"Yes": 0.8724074892848744, "yes": 0.10696805933193272}, "ground_truth": 0}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8463654911795293, "res": {"Yes": 0.8463654911795293, "yes": 0.14311637049157583}, "ground_truth": 0}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8553360468609678, "res": {"Yes": 0.8553360468609678, "yes": 0.13155273251865626}, "ground_truth": 1}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8790268374197164, "res": {"Yes": 0.8790268374197164, "yes": 0.11170265546527382}, "ground_truth": 0}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8897779515721265, "res": {"Yes": 0.8897779515721265, "yes": 0.1001924642745868}, "ground_truth": 0}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8233380905903147, "res": {"Yes": 0.8233380905903147, "yes": 0.1646148460651457}, "ground_truth": 0}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9438645266875932, "res": {"Yes": 0.9438645266875932, "yes": 0.044990496518186054}, "ground_truth": 0}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5924902273695094, "res": {"Yes": 0.5924902273695094, "yes": 0.39500441378666096}, "ground_truth": 1}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9561649012101194, "res": {"Yes": 0.9561649012101194, "yes": 0.03499768018912761}, "ground_truth": 0}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9137757387977109, "res": {"Yes": 0.9137757387977109, "yes": 0.08360344095414025}, "ground_truth": 0}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8191475931080866, "res": {"Yes": 0.8191475931080866, "yes": 0.16752550008346068}, "ground_truth": 0}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7528206200096131, "res": {"Yes": 0.7528206200096131, "yes": 0.23475196272801635}, "ground_truth": 0}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7239777638372844, "res": {"Yes": 0.7239777638372844, "yes": 0.26335490706015535}, "ground_truth": 1}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7404334211388619, "res": {"Yes": 0.7404334211388619, "yes": 0.2490750804639949}, "ground_truth": 0}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8291013795074438, "res": {"Yes": 0.8291013795074438, "yes": 0.13355227563475233}, "ground_truth": 0}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8870852950693379, "res": {"Yes": 0.8870852950693379, "yes": 0.10523366714307783}, "ground_truth": 0}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8206543082631192, "res": {"Yes": 0.8206543082631192, "yes": 0.16847141289373524}, "ground_truth": 0}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7980215794345671, "res": {"Yes": 0.7980215794345671, "yes": 0.19037246189302603}, "ground_truth": 1}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9306564012540185, "res": {"Yes": 0.9306564012540185, "yes": 0.06314712233179089}, "ground_truth": 0}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8658459786417251, "res": {"Yes": 0.8658459786417251, "yes": 0.1252895274978056}, "ground_truth": 0}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8118064900238269, "res": {"Yes": 0.8118064900238269, "yes": 0.18297142241534498}, "ground_truth": 0}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9514889933456664, "res": {"Yes": 0.9514889933456664, "yes": 0.03855958391053136}, "ground_truth": 0}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7336797892368393, "res": {"Yes": 0.7336797892368393, "yes": 0.2599986084750422}, "ground_truth": 1}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6694393553200096, "res": {"Yes": 0.6694393553200096, "yes": 0.3274637883315963}, "ground_truth": 0}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8339627015379704, "res": {"Yes": 0.8339627015379704, "yes": 0.15562201385929206}, "ground_truth": 0}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.865794410390955, "res": {"Yes": 0.865794410390955, "yes": 0.12621434077877594}, "ground_truth": 0}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8854104333353704, "res": {"Yes": 0.8854104333353704, "yes": 0.10872817704510952}, "ground_truth": 0}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9142244579970056, "res": {"Yes": 0.9142244579970056, "yes": 0.08056772048078979}, "ground_truth": 1}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9269100514511138, "res": {"Yes": 0.9269100514511138, "yes": 0.06684517864664635}, "ground_truth": 0}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.953940953552666, "res": {"Yes": 0.953940953552666, "yes": 0.041661818213276296}, "ground_truth": 0}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8779140000268014, "res": {"Yes": 0.8779140000268014, "yes": 0.11840463622418487}, "ground_truth": 0}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.703844491486046, "res": {"Yes": 0.703844491486046, "yes": 0.28676595033100766}, "ground_truth": 0}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8184177793968413, "res": {"Yes": 0.8184177793968413, "yes": 0.1641227755237748}, "ground_truth": 1}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9262560691543008, "res": {"Yes": 0.9262560691543008, "yes": 0.06478527105461684}, "ground_truth": 0}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8918339842817287, "res": {"Yes": 0.8918339842817287, "yes": 0.10266096725167558}, "ground_truth": 0}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5389866588765323, "res": {"Yes": 0.5389866588765323, "yes": 0.45249272197710627}, "ground_truth": 0}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6845920811995903, "res": {"Yes": 0.6845920811995903, "yes": 0.3013013958796975}, "ground_truth": 0}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7373069557110802, "res": {"Yes": 0.7373069557110802, "yes": 0.2467199725171631}, "ground_truth": 1}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8051827811564376, "res": {"Yes": 0.8051827811564376, "yes": 0.18594238710456856}, "ground_truth": 0}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8219045910045947, "res": {"Yes": 0.8219045910045947, "yes": 0.16386251924699696}, "ground_truth": 0}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.890375089646182, "res": {"Yes": 0.890375089646182, "yes": 0.10376339705154458}, "ground_truth": 0}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8783494304101879, "res": {"Yes": 0.8783494304101879, "yes": 0.1123687056244285}, "ground_truth": 0}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.875374766280557, "res": {"Yes": 0.875374766280557, "yes": 0.1142993265671147}, "ground_truth": 1}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8900145304864108, "res": {"Yes": 0.8900145304864108, "yes": 0.10503412307006874}, "ground_truth": 0}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8913425461919019, "res": {"Yes": 0.8913425461919019, "yes": 0.10075985163078024}, "ground_truth": 0}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.84099274977798, "res": {"Yes": 0.84099274977798, "yes": 0.15383508915199928}, "ground_truth": 0}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.773769912699578, "res": {"Yes": 0.773769912699578, "yes": 0.22072029776761692}, "ground_truth": 0}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6815311915005207, "res": {"Yes": 0.6815311915005207, "yes": 0.3092223752230859}, "ground_truth": 1}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7451999331218082, "res": {"Yes": 0.7451999331218082, "yes": 0.24735028230279865}, "ground_truth": 0}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8343829411130098, "res": {"Yes": 0.8343829411130098, "yes": 0.15885343409877078}, "ground_truth": 0}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8799681195061932, "res": {"Yes": 0.8799681195061932, "yes": 0.11584763993985919}, "ground_truth": 0}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9141368941151972, "res": {"Yes": 0.9141368941151972, "yes": 0.08016463672040637}, "ground_truth": 0}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9305706024116643, "res": {"Yes": 0.9305706024116643, "yes": 0.06234392783317583}, "ground_truth": 1}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9404641565737417, "res": {"Yes": 0.9404641565737417, "yes": 0.053974411815028676}, "ground_truth": 0}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.950259268890873, "res": {"Yes": 0.950259268890873, "yes": 0.04518735445415064}, "ground_truth": 0}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6726740436653326, "res": {"Yes": 0.6726740436653326, "yes": 0.31715533796556383}, "ground_truth": 0}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8946468227852344, "res": {"Yes": 0.8946468227852344, "yes": 0.09224154069955494}, "ground_truth": 0}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7462839284867226, "res": {"Yes": 0.7462839284867226, "yes": 0.2403227421645027}, "ground_truth": 1}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7959976065412506, "res": {"Yes": 0.7959976065412506, "yes": 0.1897158789417503}, "ground_truth": 0}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8780240885651215, "res": {"Yes": 0.8780240885651215, "yes": 0.10339344639596343}, "ground_truth": 0}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9288645896959578, "res": {"Yes": 0.9288645896959578, "yes": 0.06676265577820778}, "ground_truth": 0}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9389639841943874, "res": {"Yes": 0.9389639841943874, "yes": 0.054414337655624305}, "ground_truth": 0}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9543814697388727, "res": {"Yes": 0.9543814697388727, "yes": 0.04274732323267862}, "ground_truth": 1}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9673130681116574, "res": {"Yes": 0.9673130681116574, "yes": 0.03035093362517549}, "ground_truth": 0}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9008873202142929, "res": {"Yes": 0.9008873202142929, "yes": 0.09453448409387376}, "ground_truth": 0}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8953806928548975, "res": {"Yes": 0.8953806928548975, "yes": 0.09574291963751867}, "ground_truth": 0}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9352804819345448, "res": {"Yes": 0.9352804819345448, "yes": 0.05460739313471777}, "ground_truth": 0}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9016084176082184, "res": {"Yes": 0.9016084176082184, "yes": 0.08770209426627579}, "ground_truth": 1}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9316141855550709, "res": {"Yes": 0.9316141855550709, "yes": 0.05855509342387649}, "ground_truth": 0}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9236861957609559, "res": {"Yes": 0.9236861957609559, "yes": 0.06644872376178643}, "ground_truth": 0}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8483866431429451, "res": {"Yes": 0.8483866431429451, "yes": 0.13867558424466903}, "ground_truth": 0}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7524196616390724, "res": {"Yes": 0.7524196616390724, "yes": 0.2340291497035766}, "ground_truth": 0}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8065211236543365, "res": {"Yes": 0.8065211236543365, "yes": 0.18317879156686243}, "ground_truth": 1}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8638634902990832, "res": {"Yes": 0.8638634902990832, "yes": 0.12534358906020873}, "ground_truth": 0}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9580988170589092, "res": {"Yes": 0.9580988170589092, "yes": 0.03827592387160426}, "ground_truth": 0}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8932551559241056, "res": {"Yes": 0.8932551559241056, "yes": 0.10060166205816937}, "ground_truth": 0}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.885568586188841, "res": {"Yes": 0.885568586188841, "yes": 0.1102339779425476}, "ground_truth": 0}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8187391287364273, "res": {"Yes": 0.8187391287364273, "yes": 0.170942420740235}, "ground_truth": 1}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9001480042602992, "res": {"Yes": 0.9001480042602992, "yes": 0.09241086690987124}, "ground_truth": 0}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.862989728547712, "res": {"Yes": 0.862989728547712, "yes": 0.1242212908824361}, "ground_truth": 0}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9080854526145612, "res": {"Yes": 0.9080854526145612, "yes": 0.08896247743526502}, "ground_truth": 0}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9795777348307148, "res": {"Yes": 0.9795777348307148, "yes": 0.017017914426134245}, "ground_truth": 0}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7392181539804444, "res": {"Yes": 0.7392181539804444, "yes": 0.2559307701952179}, "ground_truth": 1}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7212387494242739, "res": {"Yes": 0.7212387494242739, "yes": 0.2738100941318531}, "ground_truth": 0}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7438332021069541, "res": {"Yes": 0.7438332021069541, "yes": 0.2494170210674096}, "ground_truth": 0}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9339564586385176, "res": {"Yes": 0.9339564586385176, "yes": 0.054624083704248864}, "ground_truth": 0}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9347571930456889, "res": {"Yes": 0.9347571930456889, "yes": 0.058003629082803466}, "ground_truth": 0}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9289235187347373, "res": {"Yes": 0.9289235187347373, "yes": 0.05617110501170009}, "ground_truth": 1}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9414345095465152, "res": {"Yes": 0.9414345095465152, "yes": 0.05303021147521087}, "ground_truth": 0}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9489843011760768, "res": {"Yes": 0.9489843011760768, "yes": 0.043331865103309476}, "ground_truth": 0}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8629615955411318, "res": {"Yes": 0.8629615955411318, "yes": 0.12525549095725785}, "ground_truth": 0}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8739226660690114, "res": {"Yes": 0.8739226660690114, "yes": 0.11402214443748505}, "ground_truth": 0}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9366018003648732, "res": {"Yes": 0.9366018003648732, "yes": 0.05151029440133449}, "ground_truth": 1}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8905929697155411, "res": {"Yes": 0.8905929697155411, "yes": 0.10145910176272709}, "ground_truth": 0}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8856690950709563, "res": {"Yes": 0.8856690950709563, "yes": 0.10068121911280932}, "ground_truth": 0}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7388197488733791, "res": {"Yes": 0.7388197488733791, "yes": 0.24636827738256087}, "ground_truth": 0}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8664012158159496, "res": {"Yes": 0.8664012158159496, "yes": 0.12300589474524311}, "ground_truth": 0}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7786077890021414, "res": {"Yes": 0.7786077890021414, "yes": 0.19136604864454732}, "ground_truth": 1}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.736824730400462, "res": {"Yes": 0.736824730400462, "yes": 0.25857161054826655}, "ground_truth": 0}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8340963047237911, "res": {"Yes": 0.8340963047237911, "yes": 0.15599973039051857}, "ground_truth": 0}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9620415961596737, "res": {"Yes": 0.9620415961596737, "yes": 0.027532820863564542}, "ground_truth": 0}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9543357530983188, "res": {"Yes": 0.9543357530983188, "yes": 0.038257012583391134}, "ground_truth": 0}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9591934633321371, "res": {"Yes": 0.9591934633321371, "yes": 0.0310064323766607}, "ground_truth": 1}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6358001338393695, "res": {"Yes": 0.6358001338393695, "yes": 0.3366798500558117}, "ground_truth": 0}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7588278643466648, "res": {"Yes": 0.7588278643466648, "yes": 0.22319159633782212}, "ground_truth": 0}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.913692525030375, "res": {"Yes": 0.913692525030375, "yes": 0.08329326378550116}, "ground_truth": 0}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8887096942037239, "res": {"Yes": 0.8887096942037239, "yes": 0.1066234297633449}, "ground_truth": 0}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7938956337613305, "res": {"Yes": 0.7938956337613305, "yes": 0.19941952649402292}, "ground_truth": 1}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9581102999420427, "res": {"Yes": 0.9581102999420427, "yes": 0.03396782927798239}, "ground_truth": 0}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8850677705618711, "res": {"Yes": 0.8850677705618711, "yes": 0.10724881180249456}, "ground_truth": 0}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9256896052887966, "res": {"Yes": 0.9256896052887966, "yes": 0.06901143247956493}, "ground_truth": 0}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9638958689187281, "res": {"Yes": 0.9638958689187281, "yes": 0.033667308050011995}, "ground_truth": 0}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.944502051494549, "res": {"Yes": 0.944502051494549, "yes": 0.051006153330329054}, "ground_truth": 1}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9459326376724912, "res": {"Yes": 0.9459326376724912, "yes": 0.049455623818935264}, "ground_truth": 0}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9373057899352778, "res": {"Yes": 0.9373057899352778, "yes": 0.0574293719550378}, "ground_truth": 0}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5868164087495861, "res": {"Yes": 0.5868164087495861, "yes": 0.39996875796980613}, "ground_truth": 0}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7739575901356518, "res": {"Yes": 0.7739575901356518, "yes": 0.21438290223579332}, "ground_truth": 0}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.954717093080956, "res": {"Yes": 0.954717093080956, "yes": 0.024228341371616273}, "ground_truth": 1}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8088998454663271, "res": {"Yes": 0.8088998454663271, "yes": 0.17481563057377014}, "ground_truth": 0}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.510964021132452, "res": {"Yes": 0.510964021132452, "yes": 0.4696544581933282}, "ground_truth": 0}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9392501545329919, "res": {"Yes": 0.9392501545329919, "yes": 0.054577361868611396}, "ground_truth": 0}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.906570772118136, "res": {"Yes": 0.906570772118136, "yes": 0.08608271006217014}, "ground_truth": 0}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8212706950900475, "res": {"Yes": 0.8212706950900475, "yes": 0.16717506871597632}, "ground_truth": 1}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8836565852903733, "res": {"Yes": 0.8836565852903733, "yes": 0.11014039597029066}, "ground_truth": 0}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8350580463171164, "res": {"Yes": 0.8350580463171164, "yes": 0.1579716330076972}, "ground_truth": 0}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7969546202649308, "res": {"Yes": 0.7969546202649308, "yes": 0.1850780594358865}, "ground_truth": 0}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9103678685544415, "res": {"Yes": 0.9103678685544415, "yes": 0.07873136218137704}, "ground_truth": 0}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8190312168856204, "res": {"Yes": 0.8190312168856204, "yes": 0.1598330733369376}, "ground_truth": 1}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.940076839415414, "res": {"Yes": 0.940076839415414, "yes": 0.051245093609255804}, "ground_truth": 0}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8952037168641703, "res": {"Yes": 0.8952037168641703, "yes": 0.09633520024689937}, "ground_truth": 0}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.933296941030673, "res": {"Yes": 0.933296941030673, "yes": 0.05976407207502695}, "ground_truth": 0}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9543666979368034, "res": {"Yes": 0.9543666979368034, "yes": 0.0418993234065273}, "ground_truth": 0}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9703112331804649, "res": {"Yes": 0.9703112331804649, "yes": 0.02684342411342022}, "ground_truth": 1}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9597812596637179, "res": {"Yes": 0.9597812596637179, "yes": 0.037422097754253904}, "ground_truth": 0}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.979488406312406, "res": {"Yes": 0.979488406312406, "yes": 0.018245547524130344}, "ground_truth": 0}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.907242590418764, "res": {"Yes": 0.907242590418764, "yes": 0.0824545373464087}, "ground_truth": 0}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8518643145996514, "res": {"Yes": 0.8518643145996514, "yes": 0.13849696042298643}, "ground_truth": 0}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8298520226916425, "res": {"Yes": 0.8298520226916425, "yes": 0.16094826516749633}, "ground_truth": 1}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9185632724335242, "res": {"Yes": 0.9185632724335242, "yes": 0.07145960171071593}, "ground_truth": 0}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8290929392983611, "res": {"Yes": 0.8290929392983611, "yes": 0.16126991936336113}, "ground_truth": 0}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9004346660218995, "res": {"Yes": 0.9004346660218995, "yes": 0.09449355952307009}, "ground_truth": 0}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8416112244929492, "res": {"Yes": 0.8416112244929492, "yes": 0.15287201666963263}, "ground_truth": 0}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8479112980648126, "res": {"Yes": 0.8479112980648126, "yes": 0.14744258989784326}, "ground_truth": 1}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9044239565875213, "res": {"Yes": 0.9044239565875213, "yes": 0.09260143876793557}, "ground_truth": 0}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9271251751574918, "res": {"Yes": 0.9271251751574918, "yes": 0.0689548525092629}, "ground_truth": 0}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7838632890882237, "res": {"Yes": 0.7838632890882237, "yes": 0.21306180309160275}, "ground_truth": 0}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7533076265611688, "res": {"Yes": 0.7533076265611688, "yes": 0.24298812625101}, "ground_truth": 0}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.43431636883284713, "res": {"yes": 0.5592705128898938, "Yes": 0.43431636883284713}, "ground_truth": 1}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5835054424789813, "res": {"Yes": 0.5835054424789813, "yes": 0.4109719972577074}, "ground_truth": 0}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5075770941236697, "res": {"Yes": 0.5075770941236697, "yes": 0.48972958870198163}, "ground_truth": 0}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8423474848729433, "res": {"Yes": 0.8423474848729433, "yes": 0.14799831358187543}, "ground_truth": 0}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8610720726377057, "res": {"Yes": 0.8610720726377057, "yes": 0.13008986475443052}, "ground_truth": 0}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8598982935862828, "res": {"Yes": 0.8598982935862828, "yes": 0.12694573084883692}, "ground_truth": 1}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8949615164604056, "res": {"Yes": 0.8949615164604056, "yes": 0.09737008023208273}, "ground_truth": 0}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9005795756274699, "res": {"Yes": 0.9005795756274699, "yes": 0.0916580934827682}, "ground_truth": 0}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9057208527156893, "res": {"Yes": 0.9057208527156893, "yes": 0.09104132997703328}, "ground_truth": 0}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8602168933175578, "res": {"Yes": 0.8602168933175578, "yes": 0.13443525114215074}, "ground_truth": 0}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7597571015571717, "res": {"Yes": 0.7597571015571717, "yes": 0.23250884771281793}, "ground_truth": 1}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9639718683566938, "res": {"Yes": 0.9639718683566938, "yes": 0.02478837464574503}, "ground_truth": 0}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8652932086066314, "res": {"Yes": 0.8652932086066314, "yes": 0.13028127656448854}, "ground_truth": 0}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9539112225382822, "res": {"Yes": 0.9539112225382822, "yes": 0.04034122822873543}, "ground_truth": 0}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.918533901842453, "res": {"Yes": 0.918533901842453, "yes": 0.0759550926136978}, "ground_truth": 0}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8308682588230644, "res": {"Yes": 0.8308682588230644, "yes": 0.1573707173522218}, "ground_truth": 1}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8316004099774701, "res": {"Yes": 0.8316004099774701, "yes": 0.1567157224401794}, "ground_truth": 0}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8974409900761968, "res": {"Yes": 0.8974409900761968, "yes": 0.09532644639993418}, "ground_truth": 0}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8055083018198752, "res": {"Yes": 0.8055083018198752, "yes": 0.19035947893384886}, "ground_truth": 0}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8251539532969812, "res": {"Yes": 0.8251539532969812, "yes": 0.17212443576369718}, "ground_truth": 0}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8243184543571482, "res": {"Yes": 0.8243184543571482, "yes": 0.17344543929044923}, "ground_truth": 1}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6833860173013145, "res": {"Yes": 0.6833860173013145, "yes": 0.30757682055285623}, "ground_truth": 0}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6478810009520114, "res": {"Yes": 0.6478810009520114, "yes": 0.34498632514404226}, "ground_truth": 0}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7788749206464674, "res": {"Yes": 0.7788749206464674, "yes": 0.19433193050431558}, "ground_truth": 0}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7970915408596039, "res": {"Yes": 0.7970915408596039, "yes": 0.1917306224815672}, "ground_truth": 0}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6880236958496225, "res": {"Yes": 0.6880236958496225, "yes": 0.30000741138932707}, "ground_truth": 1}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6919749316222857, "res": {"Yes": 0.6919749316222857, "yes": 0.30028807952565273}, "ground_truth": 0}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7079824809609521, "res": {"Yes": 0.7079824809609521, "yes": 0.27548849822231475}, "ground_truth": 0}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9425478073194699, "res": {"Yes": 0.9425478073194699, "yes": 0.045102436930391135}, "ground_truth": 0}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8344630457203163, "res": {"Yes": 0.8344630457203163, "yes": 0.15790875701689322}, "ground_truth": 0}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6275014921818385, "res": {"Yes": 0.6275014921818385, "yes": 0.23431990075675127}, "ground_truth": 1}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9665986076824796, "res": {"Yes": 0.9665986076824796, "yes": 0.02459737374687505}, "ground_truth": 0}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6169395956193456, "res": {"Yes": 0.6169395956193456, "yes": 0.3701190900985944}, "ground_truth": 0}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.809373894470776, "res": {"Yes": 0.809373894470776, "yes": 0.17766262363839824}, "ground_truth": 0}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8466229523951012, "res": {"Yes": 0.8466229523951012, "yes": 0.13684457311495854}, "ground_truth": 0}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8308090864928227, "res": {"Yes": 0.8308090864928227, "yes": 0.16139580739026665}, "ground_truth": 1}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9213245464005898, "res": {"Yes": 0.9213245464005898, "yes": 0.06606709912425336}, "ground_truth": 0}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8329871192667291, "res": {"Yes": 0.8329871192667291, "yes": 0.1462715285416476}, "ground_truth": 0}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9803778651004255, "res": {"Yes": 0.9803778651004255, "yes": 0.013738772965454914}, "ground_truth": 0}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.3063901160372744, "res": {"yes": 0.6763444779270766, "Yes": 0.3063901160372744}, "ground_truth": 0}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3283964537601761, "res": {"yes": 0.6559037128521258, "Yes": 0.3283964537601761}, "ground_truth": 1}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9498194206329312, "res": {"Yes": 0.9498194206329312, "yes": 0.031318014621090004}, "ground_truth": 0}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7293141332357724, "res": {"Yes": 0.7293141332357724, "yes": 0.2618928836106781}, "ground_truth": 0}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8231882896206565, "res": {"Yes": 0.8231882896206565, "yes": 0.16392011065447804}, "ground_truth": 0}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.819926628636278, "res": {"Yes": 0.819926628636278, "yes": 0.16511912431959963}, "ground_truth": 0}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.751858302735775, "res": {"Yes": 0.751858302735775, "yes": 0.2368310408245461}, "ground_truth": 1}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7988616357412504, "res": {"Yes": 0.7988616357412504, "yes": 0.1926463219439668}, "ground_truth": 0}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9324895816685349, "res": {"Yes": 0.9324895816685349, "yes": 0.0636623145368553}, "ground_truth": 0}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8743662856395417, "res": {"Yes": 0.8743662856395417, "yes": 0.12188135590604307}, "ground_truth": 0}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9284402280553203, "res": {"Yes": 0.9284402280553203, "yes": 0.06573754865015102}, "ground_truth": 0}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7594605274914843, "res": {"Yes": 0.7594605274914843, "yes": 0.2352364736354247}, "ground_truth": 1}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9564701510676487, "res": {"Yes": 0.9564701510676487, "yes": 0.04016496021081795}, "ground_truth": 0}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6590936535062099, "res": {"Yes": 0.6590936535062099, "yes": 0.33453391982363023}, "ground_truth": 0}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7663012575037598, "res": {"Yes": 0.7663012575037598, "yes": 0.2207711575830483}, "ground_truth": 0}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9813111886203809, "res": {"Yes": 0.9813111886203809, "yes": 0.010427445066960766}, "ground_truth": 0}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4251878830417514, "res": {"yes": 0.5657966731706612, "Yes": 0.4251878830417514}, "ground_truth": 1}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4373793861472056, "res": {"yes": 0.5539242856924578, "Yes": 0.4373793861472056}, "ground_truth": 0}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.48588249215782586, "res": {"yes": 0.502470106686275, "Yes": 0.48588249215782586}, "ground_truth": 0}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8705609630527636, "res": {"Yes": 0.8705609630527636, "yes": 0.11884997446816252}, "ground_truth": 0}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7395755784186051, "res": {"Yes": 0.7395755784186051, "yes": 0.24519212305692817}, "ground_truth": 0}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8478993849947642, "res": {"Yes": 0.8478993849947642, "yes": 0.13906834166850668}, "ground_truth": 1}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8438759026261468, "res": {"Yes": 0.8438759026261468, "yes": 0.13874728418341783}, "ground_truth": 0}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7208608688594457, "res": {"Yes": 0.7208608688594457, "yes": 0.24969654959343288}, "ground_truth": 0}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.969882477540087, "res": {"Yes": 0.969882477540087, "yes": 0.021216803358597262}, "ground_truth": 0}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8272996217469442, "res": {"Yes": 0.8272996217469442, "yes": 0.15649274936676774}, "ground_truth": 0}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7818612373704786, "res": {"Yes": 0.7818612373704786, "yes": 0.21232902557494424}, "ground_truth": 1}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8247544032476289, "res": {"Yes": 0.8247544032476289, "yes": 0.16852777600755348}, "ground_truth": 0}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8013009056282591, "res": {"Yes": 0.8013009056282591, "yes": 0.19383846914957928}, "ground_truth": 0}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8234120379047851, "res": {"Yes": 0.8234120379047851, "yes": 0.17028860583187674}, "ground_truth": 0}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8406774618420486, "res": {"Yes": 0.8406774618420486, "yes": 0.145875800618961}, "ground_truth": 0}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9813178360448874, "res": {"Yes": 0.9813178360448874, "yes": 0.01704555181340324}, "ground_truth": 1}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9824068607020442, "res": {"Yes": 0.9824068607020442, "yes": 0.016041303728742608}, "ground_truth": 0}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9632816736023664, "res": {"Yes": 0.9632816736023664, "yes": 0.03350123416779572}, "ground_truth": 0}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6134173271397595, "res": {"Yes": 0.6134173271397595, "yes": 0.3687068113316047}, "ground_truth": 0}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8716020098349307, "res": {"Yes": 0.8716020098349307, "yes": 0.11664119098466989}, "ground_truth": 0}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8700773354171703, "res": {"Yes": 0.8700773354171703, "yes": 0.12226970225164227}, "ground_truth": 1}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.825564305340443, "res": {"Yes": 0.825564305340443, "yes": 0.16061051602587542}, "ground_truth": 0}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8372217996409096, "res": {"Yes": 0.8372217996409096, "yes": 0.1519793230431486}, "ground_truth": 0}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9135779962824254, "res": {"Yes": 0.9135779962824254, "yes": 0.07762997892846872}, "ground_truth": 0}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.902073217336847, "res": {"Yes": 0.902073217336847, "yes": 0.07977879792429522}, "ground_truth": 0}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8999110219907148, "res": {"Yes": 0.8999110219907148, "yes": 0.08401818630007651}, "ground_truth": 1}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9359696577160672, "res": {"Yes": 0.9359696577160672, "yes": 0.054821983867948995}, "ground_truth": 0}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8903427660556688, "res": {"Yes": 0.8903427660556688, "yes": 0.09183272163879722}, "ground_truth": 0}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9502949014307311, "res": {"Yes": 0.9502949014307311, "yes": 0.04453464292417947}, "ground_truth": 0}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9215311213057962, "res": {"Yes": 0.9215311213057962, "yes": 0.07136218298648231}, "ground_truth": 0}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9389440182765059, "res": {"Yes": 0.9389440182765059, "yes": 0.0524088085334115}, "ground_truth": 1}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9631718435977895, "res": {"Yes": 0.9631718435977895, "yes": 0.034329535570166894}, "ground_truth": 0}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.853170985196197, "res": {"Yes": 0.853170985196197, "yes": 0.13792256148323268}, "ground_truth": 0}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7812007655604489, "res": {"Yes": 0.7812007655604489, "yes": 0.2112474043332972}, "ground_truth": 0}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5479363984458215, "res": {"Yes": 0.5479363984458215, "yes": 0.43981134736526256}, "ground_truth": 0}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7629637184559722, "res": {"Yes": 0.7629637184559722, "yes": 0.22558952323449316}, "ground_truth": 1}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6902931130995041, "res": {"Yes": 0.6902931130995041, "yes": 0.2955216142916314}, "ground_truth": 0}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6603789777792454, "res": {"Yes": 0.6603789777792454, "yes": 0.32411771865363}, "ground_truth": 0}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7309904526412021, "res": {"Yes": 0.7309904526412021, "yes": 0.24798391346040433}, "ground_truth": 0}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8677638010142165, "res": {"Yes": 0.8677638010142165, "yes": 0.1187060511676403}, "ground_truth": 0}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8080084278564748, "res": {"Yes": 0.8080084278564748, "yes": 0.17581406346040726}, "ground_truth": 1}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9152492192754176, "res": {"Yes": 0.9152492192754176, "yes": 0.06935752711800694}, "ground_truth": 0}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7567409425742277, "res": {"Yes": 0.7567409425742277, "yes": 0.2274862913975228}, "ground_truth": 0}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9613250872872843, "res": {"Yes": 0.9613250872872843, "yes": 0.03213631756711591}, "ground_truth": 0}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8700475531797048, "res": {"Yes": 0.8700475531797048, "yes": 0.12078233954362422}, "ground_truth": 0}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8763127072486202, "res": {"Yes": 0.8763127072486202, "yes": 0.11415364212872545}, "ground_truth": 1}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.889936719917549, "res": {"Yes": 0.889936719917549, "yes": 0.10238055264388897}, "ground_truth": 0}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8244767634294975, "res": {"Yes": 0.8244767634294975, "yes": 0.16670438247147795}, "ground_truth": 0}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9436121644493236, "res": {"Yes": 0.9436121644493236, "yes": 0.031203587712262146}, "ground_truth": 0}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8220498179367027, "res": {"Yes": 0.8220498179367027, "yes": 0.17227764301833726}, "ground_truth": 0}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5756601153314995, "res": {"Yes": 0.5756601153314995, "yes": 0.41646015009328957}, "ground_truth": 1}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8595962299864339, "res": {"Yes": 0.8595962299864339, "yes": 0.1340605118490719}, "ground_truth": 0}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9072141397372375, "res": {"Yes": 0.9072141397372375, "yes": 0.08595119017106453}, "ground_truth": 0}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8718208704250545, "res": {"Yes": 0.8718208704250545, "yes": 0.11826977915689549}, "ground_truth": 0}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8166270626513609, "res": {"Yes": 0.8166270626513609, "yes": 0.17094512165181983}, "ground_truth": 0}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9023806150664924, "res": {"Yes": 0.9023806150664924, "yes": 0.09314554585369755}, "ground_truth": 1}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7374249195135277, "res": {"Yes": 0.7374249195135277, "yes": 0.24880866289834966}, "ground_truth": 0}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9873946020543465, "res": {"Yes": 0.9873946020543465, "yes": 0.00816390231044245}, "ground_truth": 0}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7782638838395626, "res": {"Yes": 0.7782638838395626, "yes": 0.21093828003044832}, "ground_truth": 0}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5409090980711774, "res": {"Yes": 0.5409090980711774, "yes": 0.4523220286764204}, "ground_truth": 0}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6865402395352114, "res": {"Yes": 0.6865402395352114, "yes": 0.29460924320633447}, "ground_truth": 1}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.647672604678575, "res": {"Yes": 0.647672604678575, "yes": 0.3438444857961372}, "ground_truth": 0}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5566594646932137, "res": {"Yes": 0.5566594646932137, "yes": 0.4344759008115676}, "ground_truth": 0}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7612018067561248, "res": {"Yes": 0.7612018067561248, "yes": 0.22845377196288838}, "ground_truth": 0}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8652031622280563, "res": {"Yes": 0.8652031622280563, "yes": 0.12625330916023453}, "ground_truth": 0}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8540640141343724, "res": {"Yes": 0.8540640141343724, "yes": 0.12534564471192558}, "ground_truth": 1}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8928370228222388, "res": {"Yes": 0.8928370228222388, "yes": 0.09286290622828594}, "ground_truth": 0}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8860921340621941, "res": {"Yes": 0.8860921340621941, "yes": 0.10102234994873623}, "ground_truth": 0}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7684986904797951, "res": {"Yes": 0.7684986904797951, "yes": 0.22476092438092513}, "ground_truth": 0}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5878997140301817, "res": {"Yes": 0.5878997140301817, "yes": 0.4061201204345445}, "ground_truth": 0}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6115864011806798, "res": {"Yes": 0.6115864011806798, "yes": 0.3809421548205268}, "ground_truth": 1}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5241099439348358, "res": {"Yes": 0.5241099439348358, "yes": 0.46734107713685985}, "ground_truth": 0}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5545284866120097, "res": {"Yes": 0.5545284866120097, "yes": 0.4410625838993966}, "ground_truth": 0}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9622417851319574, "res": {"Yes": 0.9622417851319574, "yes": 0.031197391295067083}, "ground_truth": 0}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5488144679693749, "res": {"Yes": 0.5488144679693749, "yes": 0.44440831978656264}, "ground_truth": 0}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7132648563594626, "res": {"Yes": 0.7132648563594626, "yes": 0.28169966311511796}, "ground_truth": 1}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4203825537533272, "res": {"yes": 0.571546862490802, "Yes": 0.4203825537533272}, "ground_truth": 0}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.4678595195625398, "res": {"yes": 0.5274225231311227, "Yes": 0.4678595195625398}, "ground_truth": 0}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9824566837469301, "res": {"Yes": 0.9824566837469301, "yes": 0.01467587925354678}, "ground_truth": 0}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7643512837537271, "res": {"Yes": 0.7643512837537271, "yes": 0.2046469049057534}, "ground_truth": 0}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7731813905741536, "res": {"Yes": 0.7731813905741536, "yes": 0.1851218170610847}, "ground_truth": 1}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9882649521432406, "res": {"Yes": 0.9882649521432406, "yes": 0.0105723687814742}, "ground_truth": 0}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8012215407041693, "res": {"Yes": 0.8012215407041693, "yes": 0.19614152774393306}, "ground_truth": 0}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8488442250600803, "res": {"Yes": 0.8488442250600803, "yes": 0.14643560550845416}, "ground_truth": 0}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8471102733053525, "res": {"Yes": 0.8471102733053525, "yes": 0.14994437304995822}, "ground_truth": 0}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7621191047985623, "res": {"Yes": 0.7621191047985623, "yes": 0.23286383392552049}, "ground_truth": 1}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8197674963219503, "res": {"Yes": 0.8197674963219503, "yes": 0.17496154011205828}, "ground_truth": 0}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.885029014292773, "res": {"Yes": 0.885029014292773, "yes": 0.11233138294006119}, "ground_truth": 0}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8897657705953484, "res": {"Yes": 0.8897657705953484, "yes": 0.09709659220760052}, "ground_truth": 0}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8950178515860477, "res": {"Yes": 0.8950178515860477, "yes": 0.09677971136540023}, "ground_truth": 0}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8877172489844916, "res": {"Yes": 0.8877172489844916, "yes": 0.10653962471279547}, "ground_truth": 1}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9097269312976409, "res": {"Yes": 0.9097269312976409, "yes": 0.08539010939000738}, "ground_truth": 0}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9297231456533563, "res": {"Yes": 0.9297231456533563, "yes": 0.06463616468447315}, "ground_truth": 0}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.82686324532192, "res": {"Yes": 0.82686324532192, "yes": 0.16405097110915737}, "ground_truth": 0}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7832999801111863, "res": {"Yes": 0.7832999801111863, "yes": 0.21108225232947758}, "ground_truth": 0}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7623560615941423, "res": {"Yes": 0.7623560615941423, "yes": 0.22067677601628283}, "ground_truth": 1}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8804958801905323, "res": {"Yes": 0.8804958801905323, "yes": 0.11119376153982014}, "ground_truth": 0}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8023149920964056, "res": {"Yes": 0.8023149920964056, "yes": 0.18831546822227024}, "ground_truth": 0}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6143682530203812, "res": {"Yes": 0.6143682530203812, "yes": 0.3722190328494939}, "ground_truth": 0}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7475159396486241, "res": {"Yes": 0.7475159396486241, "yes": 0.23116276643138017}, "ground_truth": 0}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.744687821473528, "res": {"Yes": 0.744687821473528, "yes": 0.24371253097429124}, "ground_truth": 1}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8105993148713847, "res": {"Yes": 0.8105993148713847, "yes": 0.17601282804161486}, "ground_truth": 0}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.695270660264238, "res": {"Yes": 0.695270660264238, "yes": 0.28308799309590554}, "ground_truth": 0}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.882296104437123, "res": {"Yes": 0.882296104437123, "yes": 0.10089456684500228}, "ground_truth": 0}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7934191853595319, "res": {"Yes": 0.7934191853595319, "yes": 0.1931195793142069}, "ground_truth": 0}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7996384427782582, "res": {"Yes": 0.7996384427782582, "yes": 0.1849771638817491}, "ground_truth": 1}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7765367575794502, "res": {"Yes": 0.7765367575794502, "yes": 0.20424833439747486}, "ground_truth": 0}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7981464795849305, "res": {"Yes": 0.7981464795849305, "yes": 0.190687922660313}, "ground_truth": 0}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9024180826874524, "res": {"Yes": 0.9024180826874524, "yes": 0.08979415454018473}, "ground_truth": 0}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9055085041868589, "res": {"Yes": 0.9055085041868589, "yes": 0.08988007475008204}, "ground_truth": 0}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8747921007487123, "res": {"Yes": 0.8747921007487123, "yes": 0.10880470510989453}, "ground_truth": 1}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9656779359474077, "res": {"Yes": 0.9656779359474077, "yes": 0.025865832233385726}, "ground_truth": 0}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9029718553419467, "res": {"Yes": 0.9029718553419467, "yes": 0.08906446906175478}, "ground_truth": 0}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8612525980974077, "res": {"Yes": 0.8612525980974077, "yes": 0.13383462272513358}, "ground_truth": 0}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9155821651131553, "res": {"Yes": 0.9155821651131553, "yes": 0.08029842276525256}, "ground_truth": 0}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9146507742925564, "res": {"Yes": 0.9146507742925564, "yes": 0.07914272586793064}, "ground_truth": 1}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8425549300572214, "res": {"Yes": 0.8425549300572214, "yes": 0.15265365750747725}, "ground_truth": 0}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8820343836544392, "res": {"Yes": 0.8820343836544392, "yes": 0.11126828632306635}, "ground_truth": 0}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9399718151975549, "res": {"Yes": 0.9399718151975549, "yes": 0.05775205186970934}, "ground_truth": 0}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9173052987680494, "res": {"Yes": 0.9173052987680494, "yes": 0.07907005874106957}, "ground_truth": 0}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9469481283555465, "res": {"Yes": 0.9469481283555465, "yes": 0.04796398318693982}, "ground_truth": 1}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9484383775841421, "res": {"Yes": 0.9484383775841421, "yes": 0.048116592641219286}, "ground_truth": 0}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9634477326292901, "res": {"Yes": 0.9634477326292901, "yes": 0.03442049180331424}, "ground_truth": 0}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.935254419771761, "res": {"Yes": 0.935254419771761, "yes": 0.05134959138952444}, "ground_truth": 0}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.956968685107781, "res": {"Yes": 0.956968685107781, "yes": 0.04054992289103331}, "ground_truth": 0}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9164373517615167, "res": {"Yes": 0.9164373517615167, "yes": 0.07033577634601176}, "ground_truth": 1}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9025825543450435, "res": {"Yes": 0.9025825543450435, "yes": 0.08148901932672012}, "ground_truth": 0}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9545254690879835, "res": {"Yes": 0.9545254690879835, "yes": 0.025708019318877645}, "ground_truth": 0}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8843275882915551, "res": {"Yes": 0.8843275882915551, "yes": 0.10931382555363695}, "ground_truth": 0}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6656348447226955, "res": {"Yes": 0.6656348447226955, "yes": 0.3297278373849211}, "ground_truth": 0}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7312750460602936, "res": {"Yes": 0.7312750460602936, "yes": 0.26435668716166405}, "ground_truth": 1}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.546767014810505, "res": {"Yes": 0.546767014810505, "yes": 0.4493196900631108}, "ground_truth": 0}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5978769592286298, "res": {"Yes": 0.5978769592286298, "yes": 0.3941987717785919}, "ground_truth": 0}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7915513064245516, "res": {"Yes": 0.7915513064245516, "yes": 0.20452979853642567}, "ground_truth": 0}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8568963610165499, "res": {"Yes": 0.8568963610165499, "yes": 0.13828419878780798}, "ground_truth": 0}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6843593936748893, "res": {"Yes": 0.6843593936748893, "yes": 0.3100688219042253}, "ground_truth": 1}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.871123535834762, "res": {"Yes": 0.871123535834762, "yes": 0.12403872050297537}, "ground_truth": 0}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7429490053984787, "res": {"Yes": 0.7429490053984787, "yes": 0.2537461299933946}, "ground_truth": 0}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8088037538726394, "res": {"Yes": 0.8088037538726394, "yes": 0.1811494496021125}, "ground_truth": 0}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.877732211640464, "res": {"Yes": 0.877732211640464, "yes": 0.10683833231971751}, "ground_truth": 0}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.780264799790691, "res": {"Yes": 0.780264799790691, "yes": 0.2087376872968166}, "ground_truth": 1}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8555214097113748, "res": {"Yes": 0.8555214097113748, "yes": 0.12173732855656882}, "ground_truth": 0}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8955738884977024, "res": {"Yes": 0.8955738884977024, "yes": 0.09003442336014437}, "ground_truth": 0}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9817273832184725, "res": {"Yes": 0.9817273832184725, "yes": 0.01580698101717769}, "ground_truth": 0}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.680841858043739, "res": {"Yes": 0.680841858043739, "yes": 0.3127993651139192}, "ground_truth": 0}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8717799917028495, "res": {"Yes": 0.8717799917028495, "yes": 0.11916812116622176}, "ground_truth": 1}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9799370539927862, "res": {"Yes": 0.9799370539927862, "yes": 0.018307248585360664}, "ground_truth": 0}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9668736789158936, "res": {"Yes": 0.9668736789158936, "yes": 0.02716391882677109}, "ground_truth": 0}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6915877070389008, "res": {"Yes": 0.6915877070389008, "yes": 0.28076709408621087}, "ground_truth": 0}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8498706224203186, "res": {"Yes": 0.8498706224203186, "yes": 0.14163741533981467}, "ground_truth": 0}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8255453836234083, "res": {"Yes": 0.8255453836234083, "yes": 0.16419315004134188}, "ground_truth": 1}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8151424220718894, "res": {"Yes": 0.8151424220718894, "yes": 0.17424540497334426}, "ground_truth": 0}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8305813181767215, "res": {"Yes": 0.8305813181767215, "yes": 0.16041149845349562}, "ground_truth": 0}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.765223271169406, "res": {"Yes": 0.765223271169406, "yes": 0.22375642660112122}, "ground_truth": 0}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7072675179510741, "res": {"Yes": 0.7072675179510741, "yes": 0.28593530501272224}, "ground_truth": 0}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.605269509436916, "res": {"Yes": 0.605269509436916, "yes": 0.38241497806191727}, "ground_truth": 1}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8664709379270179, "res": {"Yes": 0.8664709379270179, "yes": 0.12997713871883462}, "ground_truth": 0}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5466542284096668, "res": {"Yes": 0.5466542284096668, "yes": 0.44190276932767864}, "ground_truth": 0}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8502416209081687, "res": {"Yes": 0.8502416209081687, "yes": 0.1379863240255386}, "ground_truth": 0}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8992400870880967, "res": {"Yes": 0.8992400870880967, "yes": 0.09452329187354953}, "ground_truth": 0}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8145803958831697, "res": {"Yes": 0.8145803958831697, "yes": 0.1758078749142925}, "ground_truth": 1}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.876541787801274, "res": {"Yes": 0.876541787801274, "yes": 0.11225492253531619}, "ground_truth": 0}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8858495862511783, "res": {"Yes": 0.8858495862511783, "yes": 0.10913226412800621}, "ground_truth": 0}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6597812752655189, "res": {"Yes": 0.6597812752655189, "yes": 0.2918038643018964}, "ground_truth": 0}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.48266806384610195, "res": {"yes": 0.4913506948547394, "Yes": 0.48266806384610195}, "ground_truth": 0}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7891039719094533, "res": {"Yes": 0.7891039719094533, "yes": 0.1792038310576331}, "ground_truth": 1}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3242125693740917, "res": {"yes": 0.6136253844999512, "Yes": 0.3242125693740917}, "ground_truth": 0}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.47465317478039504, "res": {"yes": 0.5173483501378754, "Yes": 0.47465317478039504}, "ground_truth": 0}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.969482939179414, "res": {"Yes": 0.969482939179414, "yes": 0.02281317634680042}, "ground_truth": 0}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8136569981324567, "res": {"Yes": 0.8136569981324567, "yes": 0.18004618097965286}, "ground_truth": 0}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.860351080413557, "res": {"Yes": 0.860351080413557, "yes": 0.1341055771599056}, "ground_truth": 1}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7651095674397066, "res": {"Yes": 0.7651095674397066, "yes": 0.18771823319817185}, "ground_truth": 0}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8591250597438705, "res": {"Yes": 0.8591250597438705, "yes": 0.13707721483352756}, "ground_truth": 0}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9317585354493603, "res": {"Yes": 0.9317585354493603, "yes": 0.05657389608896307}, "ground_truth": 0}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.4975462807883273, "res": {"Yes": 0.4975462807883273, "yes": 0.482001597799649}, "ground_truth": 0}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2689739080987859, "res": {"yes": 0.7158676324443498, "Yes": 0.2689739080987859}, "ground_truth": 1}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5408812636051776, "res": {"Yes": 0.5408812636051776, "yes": 0.44627367235550364}, "ground_truth": 0}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7535618806507078, "res": {"Yes": 0.7535618806507078, "yes": 0.2385811201663484}, "ground_truth": 0}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7860490059182625, "res": {"Yes": 0.7860490059182625, "yes": 0.2034952328850829}, "ground_truth": 0}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5893357050822056, "res": {"Yes": 0.5893357050822056, "yes": 0.3979974144800825}, "ground_truth": 0}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5329111588719837, "res": {"Yes": 0.5329111588719837, "yes": 0.4598154112153398}, "ground_truth": 1}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5575688586508124, "res": {"Yes": 0.5575688586508124, "yes": 0.4346294717292972}, "ground_truth": 0}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7401971717234697, "res": {"Yes": 0.7401971717234697, "yes": 0.2536207849995956}, "ground_truth": 0}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8299540011736204, "res": {"Yes": 0.8299540011736204, "yes": 0.15769965069695974}, "ground_truth": 0}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9807095732320188, "res": {"Yes": 0.9807095732320188, "yes": 0.015893618540060835}, "ground_truth": 0}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7869086271791714, "res": {"Yes": 0.7869086271791714, "yes": 0.20275952663259353}, "ground_truth": 1}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8935118741423015, "res": {"Yes": 0.8935118741423015, "yes": 0.10207696412323254}, "ground_truth": 0}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8843937738448954, "res": {"Yes": 0.8843937738448954, "yes": 0.11085724349931479}, "ground_truth": 0}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.871147343966333, "res": {"Yes": 0.871147343966333, "yes": 0.12499600527381217}, "ground_truth": 0}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8646522984245093, "res": {"Yes": 0.8646522984245093, "yes": 0.13101680258480042}, "ground_truth": 0}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9191856076905003, "res": {"Yes": 0.9191856076905003, "yes": 0.07824568459721595}, "ground_truth": 1}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9576923474852094, "res": {"Yes": 0.9576923474852094, "yes": 0.039207155914752746}, "ground_truth": 0}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9629437404716391, "res": {"Yes": 0.9629437404716391, "yes": 0.035089676801078724}, "ground_truth": 0}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8593575790358917, "res": {"Yes": 0.8593575790358917, "yes": 0.13771438671768507}, "ground_truth": 0}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8969432458457607, "res": {"Yes": 0.8969432458457607, "yes": 0.0987303383837121}, "ground_truth": 0}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8371585499232348, "res": {"Yes": 0.8371585499232348, "yes": 0.15860039170781207}, "ground_truth": 1}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8429850545688617, "res": {"Yes": 0.8429850545688617, "yes": 0.1537880228162354}, "ground_truth": 0}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8787981004509671, "res": {"Yes": 0.8787981004509671, "yes": 0.11474216628966673}, "ground_truth": 0}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8516498421684474, "res": {"Yes": 0.8516498421684474, "yes": 0.13919464522958583}, "ground_truth": 0}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8772180551007763, "res": {"Yes": 0.8772180551007763, "yes": 0.1126990823931686}, "ground_truth": 0}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7514931837674269, "res": {"Yes": 0.7514931837674269, "yes": 0.24192846395899434}, "ground_truth": 1}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6879439722830201, "res": {"Yes": 0.6879439722830201, "yes": 0.280592258721432}, "ground_truth": 0}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9226128822231116, "res": {"Yes": 0.9226128822231116, "yes": 0.07196766198186792}, "ground_truth": 0}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8960497213219875, "res": {"Yes": 0.8960497213219875, "yes": 0.09515444220427585}, "ground_truth": 0}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7470862788876863, "res": {"Yes": 0.7470862788876863, "yes": 0.2369472586390735}, "ground_truth": 0}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7523249906293998, "res": {"Yes": 0.7523249906293998, "yes": 0.23923993942582267}, "ground_truth": 1}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8843700989404595, "res": {"Yes": 0.8843700989404595, "yes": 0.10643136092157407}, "ground_truth": 0}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.858724440069282, "res": {"Yes": 0.858724440069282, "yes": 0.13371767562297343}, "ground_truth": 0}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8087766189618877, "res": {"Yes": 0.8087766189618877, "yes": 0.16946446841608667}, "ground_truth": 0}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9471185583696248, "res": {"Yes": 0.9471185583696248, "yes": 0.04653116387528926}, "ground_truth": 0}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8208625264649037, "res": {"Yes": 0.8208625264649037, "yes": 0.1660812587258561}, "ground_truth": 1}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9785568281805035, "res": {"Yes": 0.9785568281805035, " Yes": 0.01250288326590129}, "ground_truth": 0}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6947055863019703, "res": {"Yes": 0.6947055863019703, "yes": 0.2621970391632128}, "ground_truth": 0}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6605220709892987, "res": {"Yes": 0.6605220709892987, "yes": 0.32869796151146335}, "ground_truth": 0}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.49819144406342053, "res": {"Yes": 0.49819144406342053, "yes": 0.4877864506652942}, "ground_truth": 0}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.584170054107982, "res": {"Yes": 0.584170054107982, "yes": 0.39647772046119123}, "ground_truth": 1}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5914638164276921, "res": {"Yes": 0.5914638164276921, "yes": 0.39125298803765163}, "ground_truth": 0}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.4484840549796984, "res": {"yes": 0.5374417806051706, "Yes": 0.4484840549796984}, "ground_truth": 0}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7226487484343611, "res": {"Yes": 0.7226487484343611, "yes": 0.271019149022269}, "ground_truth": 0}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7790622781730941, "res": {"Yes": 0.7790622781730941, "yes": 0.2149623247350874}, "ground_truth": 0}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8474337617577163, "res": {"Yes": 0.8474337617577163, "yes": 0.1490952970443989}, "ground_truth": 1}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7655052390114347, "res": {"Yes": 0.7655052390114347, "yes": 0.2242698869329488}, "ground_truth": 0}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.835773228892479, "res": {"Yes": 0.835773228892479, "yes": 0.15796540904796952}, "ground_truth": 0}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7671514035719601, "res": {"Yes": 0.7671514035719601, "yes": 0.2188196072038995}, "ground_truth": 0}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7312127148319622, "res": {"Yes": 0.7312127148319622, "yes": 0.24383485661126963}, "ground_truth": 0}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7550278975639004, "res": {"Yes": 0.7550278975639004, "yes": 0.23414668524447108}, "ground_truth": 1}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.630841270741896, "res": {"Yes": 0.630841270741896, "yes": 0.3524771787360289}, "ground_truth": 0}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9124711777972083, "res": {"Yes": 0.9124711777972083, "yes": 0.0660128868970353}, "ground_truth": 0}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6675284619441973, "res": {"Yes": 0.6675284619441973, "yes": 0.32535284781439966}, "ground_truth": 0}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8492452713394358, "res": {"Yes": 0.8492452713394358, "yes": 0.14633402908780668}, "ground_truth": 0}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5234955495734714, "res": {"Yes": 0.5234955495734714, "yes": 0.46369892773427535}, "ground_truth": 1}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7516595602282167, "res": {"Yes": 0.7516595602282167, "yes": 0.24349392145375778}, "ground_truth": 0}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8740590698234949, "res": {"Yes": 0.8740590698234949, "yes": 0.12001057681010402}, "ground_truth": 0}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8220383750828796, "res": {"Yes": 0.8220383750828796, "yes": 0.1741951424247307}, "ground_truth": 0}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6256768219846276, "res": {"Yes": 0.6256768219846276, "yes": 0.36615574621098146}, "ground_truth": 0}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6371462231923342, "res": {"Yes": 0.6371462231923342, "yes": 0.35844774130012946}, "ground_truth": 1}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7397364683961823, "res": {"Yes": 0.7397364683961823, "yes": 0.25559832748144246}, "ground_truth": 0}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6125393032809584, "res": {"Yes": 0.6125393032809584, "yes": 0.38255367835233506}, "ground_truth": 0}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8075652033197496, "res": {"Yes": 0.8075652033197496, "yes": 0.17231399743627032}, "ground_truth": 0}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6579502884361237, "res": {"Yes": 0.6579502884361237, "yes": 0.3204790087776052}, "ground_truth": 0}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.649796358366694, "res": {"Yes": 0.649796358366694, "yes": 0.32225864839177387}, "ground_truth": 1}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7235606918312861, "res": {"Yes": 0.7235606918312861, "yes": 0.26000011647134447}, "ground_truth": 0}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.677069129338499, "res": {"Yes": 0.677069129338499, "yes": 0.2874811747554675}, "ground_truth": 0}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8637380837010468, "res": {"Yes": 0.8637380837010468, "yes": 0.1281461332822862}, "ground_truth": 0}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8366125909030272, "res": {"Yes": 0.8366125909030272, "yes": 0.15073271650461}, "ground_truth": 0}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8779684850602962, "res": {"Yes": 0.8779684850602962, "yes": 0.11365542449434039}, "ground_truth": 1}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9451934816107785, "res": {"Yes": 0.9451934816107785, "yes": 0.048420596899884254}, "ground_truth": 0}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.875717069708296, "res": {"Yes": 0.875717069708296, "yes": 0.11799102697412708}, "ground_truth": 0}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9143642536037351, "res": {"Yes": 0.9143642536037351, "yes": 0.08334688858468285}, "ground_truth": 0}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8580222048933265, "res": {"Yes": 0.8580222048933265, "yes": 0.13694983336934557}, "ground_truth": 0}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.891821088455553, "res": {"Yes": 0.891821088455553, "yes": 0.1048396121479609}, "ground_truth": 1}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8858445457813726, "res": {"Yes": 0.8858445457813726, "yes": 0.10701859227877121}, "ground_truth": 0}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8901449360678999, "res": {"Yes": 0.8901449360678999, "yes": 0.10318484370951868}, "ground_truth": 0}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8171568660852007, "res": {"Yes": 0.8171568660852007, "yes": 0.17806785383652976}, "ground_truth": 0}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8744897811104899, "res": {"Yes": 0.8744897811104899, "yes": 0.1216141926973234}, "ground_truth": 0}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8923887169438923, "res": {"Yes": 0.8923887169438923, "yes": 0.10330060976688069}, "ground_truth": 1}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8375130360258962, "res": {"Yes": 0.8375130360258962, "yes": 0.15823868837112398}, "ground_truth": 0}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8593386475969561, "res": {"Yes": 0.8593386475969561, "yes": 0.13817177797105581}, "ground_truth": 0}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9258294782977, "res": {"Yes": 0.9258294782977, "yes": 0.06145826207588879}, "ground_truth": 0}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7285637854054724, "res": {"Yes": 0.7285637854054724, "yes": 0.2630547483071949}, "ground_truth": 0}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7488259866703993, "res": {"Yes": 0.7488259866703993, "yes": 0.24426250300347008}, "ground_truth": 1}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7911815483817387, "res": {"Yes": 0.7911815483817387, "yes": 0.20299640691968995}, "ground_truth": 0}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8696760004260103, "res": {"Yes": 0.8696760004260103, "yes": 0.1257386540896662}, "ground_truth": 0}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8580811873668952, "res": {"Yes": 0.8580811873668952, "yes": 0.13372318490469953}, "ground_truth": 0}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9332441869787861, "res": {"Yes": 0.9332441869787861, "yes": 0.06030590216634783}, "ground_truth": 0}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8242052172663225, "res": {"Yes": 0.8242052172663225, "yes": 0.16258097396502344}, "ground_truth": 1}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8788670624435978, "res": {"Yes": 0.8788670624435978, "yes": 0.11304907076200384}, "ground_truth": 0}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7752627614965071, "res": {"Yes": 0.7752627614965071, "yes": 0.20682219834806154}, "ground_truth": 0}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7641974658935693, "res": {"Yes": 0.7641974658935693, "yes": 0.21015632929445294}, "ground_truth": 0}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8022340345753336, "res": {"Yes": 0.8022340345753336, "yes": 0.1792108201433306}, "ground_truth": 0}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5756622855742253, "res": {"Yes": 0.5756622855742253, "yes": 0.40864309183831365}, "ground_truth": 1}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8109245763443612, "res": {"Yes": 0.8109245763443612, "yes": 0.17739565536872326}, "ground_truth": 0}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8042874168829324, "res": {"Yes": 0.8042874168829324, "yes": 0.1780259950047913}, "ground_truth": 0}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8571988902547817, "res": {"Yes": 0.8571988902547817, "yes": 0.11541005465239806}, "ground_truth": 0}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.811054415762402, "res": {"Yes": 0.811054415762402, "yes": 0.1728109597749301}, "ground_truth": 0}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8135972616286077, "res": {"Yes": 0.8135972616286077, "yes": 0.1770740450977392}, "ground_truth": 1}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.879632326159221, "res": {"Yes": 0.879632326159221, "yes": 0.11301171421762599}, "ground_truth": 0}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7832676461553847, "res": {"Yes": 0.7832676461553847, "yes": 0.20801012969706284}, "ground_truth": 0}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9005896261516158, "res": {"Yes": 0.9005896261516158, "yes": 0.09002345783519312}, "ground_truth": 0}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5822778148736041, "res": {"Yes": 0.5822778148736041, "yes": 0.4075726173872667}, "ground_truth": 0}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.48992927256490826, "res": {"yes": 0.5038122500779855, "Yes": 0.48992927256490826}, "ground_truth": 1}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.676275440206515, "res": {"Yes": 0.676275440206515, "yes": 0.31346245087690455}, "ground_truth": 0}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9790559254564548, "res": {"Yes": 0.9790559254564548, "yes": 0.012789901049158598}, "ground_truth": 0}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7794772081307296, "res": {"Yes": 0.7794772081307296, "yes": 0.2097968494612393}, "ground_truth": 0}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5998698883691647, "res": {"Yes": 0.5998698883691647, "yes": 0.39285540933043783}, "ground_truth": 0}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.47207985187700346, "res": {"yes": 0.5193007416294064, "Yes": 0.47207985187700346}, "ground_truth": 1}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9645801886190226, "res": {"Yes": 0.9645801886190226, "yes": 0.027766625529754327}, "ground_truth": 0}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9595977948576607, "res": {"Yes": 0.9595977948576607, "yes": 0.033717320912444546}, "ground_truth": 0}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8068243519329026, "res": {"Yes": 0.8068243519329026, "yes": 0.18484417613361187}, "ground_truth": 0}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8061882926897168, "res": {"Yes": 0.8061882926897168, "yes": 0.1865343079462301}, "ground_truth": 0}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8190026823350989, "res": {"Yes": 0.8190026823350989, "yes": 0.1735400969423854}, "ground_truth": 1}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8557256384893727, "res": {"Yes": 0.8557256384893727, "yes": 0.13326010285647033}, "ground_truth": 0}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8653696346777412, "res": {"Yes": 0.8653696346777412, "yes": 0.12520305746091254}, "ground_truth": 0}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7302746811979396, "res": {"Yes": 0.7302746811979396, "yes": 0.26537732441884926}, "ground_truth": 0}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.615062986098191, "res": {"Yes": 0.615062986098191, "yes": 0.3768596168420323}, "ground_truth": 0}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7093727004315644, "res": {"Yes": 0.7093727004315644, "yes": 0.2847068569961359}, "ground_truth": 1}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8629404532410353, "res": {"Yes": 0.8629404532410353, "yes": 0.13176308231258851}, "ground_truth": 0}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7949538232317224, "res": {"Yes": 0.7949538232317224, "yes": 0.1972055880988164}, "ground_truth": 0}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9033464080727254, "res": {"Yes": 0.9033464080727254, "yes": 0.08966982191718713}, "ground_truth": 0}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.839795709247732, "res": {"Yes": 0.839795709247732, "yes": 0.15149709941372339}, "ground_truth": 0}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8204069428972991, "res": {"Yes": 0.8204069428972991, "yes": 0.1685951500234238}, "ground_truth": 1}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.907271159937761, "res": {"Yes": 0.907271159937761, "yes": 0.08052781711694292}, "ground_truth": 0}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8766564644987934, "res": {"Yes": 0.8766564644987934, "yes": 0.11479037960022181}, "ground_truth": 0}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8484063004891963, "res": {"Yes": 0.8484063004891963, "yes": 0.13655782878098788}, "ground_truth": 0}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6694295146338156, "res": {"Yes": 0.6694295146338156, "yes": 0.3124736604243748}, "ground_truth": 0}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7386883983994326, "res": {"Yes": 0.7386883983994326, "yes": 0.25020637541474694}, "ground_truth": 1}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8441344804946626, "res": {"Yes": 0.8441344804946626, "yes": 0.13827283229383694}, "ground_truth": 0}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8725142872220722, "res": {"Yes": 0.8725142872220722, "yes": 0.11143405478005139}, "ground_truth": 0}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8331575408726971, "res": {"Yes": 0.8331575408726971, "yes": 0.15269544453183045}, "ground_truth": 0}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7641159991443925, "res": {"Yes": 0.7641159991443925, "yes": 0.2268692088542734}, "ground_truth": 0}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9718494824103805, "res": {"Yes": 0.9718494824103805, "yes": 0.014209625384905184}, "ground_truth": 1}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7620010083518312, "res": {"Yes": 0.7620010083518312, "yes": 0.22818056767008496}, "ground_truth": 0}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.894153122956745, "res": {"Yes": 0.894153122956745, "yes": 0.10221161070340337}, "ground_truth": 0}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8074925903213964, "res": {"Yes": 0.8074925903213964, "yes": 0.18786666434195226}, "ground_truth": 0}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7239957693881691, "res": {"Yes": 0.7239957693881691, "yes": 0.26219439098649056}, "ground_truth": 0}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7497601901861748, "res": {"Yes": 0.7497601901861748, "yes": 0.24582855915777557}, "ground_truth": 1}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6351677638140744, "res": {"Yes": 0.6351677638140744, "yes": 0.33685187005338735}, "ground_truth": 0}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7370078025075956, "res": {"Yes": 0.7370078025075956, "yes": 0.2516996921310345}, "ground_truth": 0}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7283633850744368, "res": {"Yes": 0.7283633850744368, "yes": 0.2635264332744417}, "ground_truth": 0}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7290931626560263, "res": {"Yes": 0.7290931626560263, "yes": 0.26063518709966405}, "ground_truth": 0}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7578361482234645, "res": {"Yes": 0.7578361482234645, "yes": 0.23000841126735594}, "ground_truth": 1}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9717785477618228, "res": {"Yes": 0.9717785477618228, "yes": 0.01956101938374616}, "ground_truth": 0}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7448806610081927, "res": {"Yes": 0.7448806610081927, "yes": 0.25030002016026626}, "ground_truth": 0}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8564955643846704, "res": {"Yes": 0.8564955643846704, "yes": 0.1332146422653936}, "ground_truth": 0}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8361068525583041, "res": {"Yes": 0.8361068525583041, "yes": 0.15376195795542744}, "ground_truth": 0}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.790350832759618, "res": {"Yes": 0.790350832759618, "yes": 0.19757695993220484}, "ground_truth": 1}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8092922246718041, "res": {"Yes": 0.8092922246718041, "yes": 0.18222252024693086}, "ground_truth": 0}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6366104620557637, "res": {"Yes": 0.6366104620557637, "yes": 0.3506779493211472}, "ground_truth": 0}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7821162986446785, "res": {"Yes": 0.7821162986446785, "yes": 0.2069770834119912}, "ground_truth": 0}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9275496824048519, "res": {"Yes": 0.9275496824048519, "yes": 0.06885969846766155}, "ground_truth": 0}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9415157588505461, "res": {"Yes": 0.9415157588505461, "yes": 0.048941087506237574}, "ground_truth": 1}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8573171661447448, "res": {"Yes": 0.8573171661447448, "yes": 0.13400992695786942}, "ground_truth": 0}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8102164700860188, "res": {"Yes": 0.8102164700860188, "yes": 0.17540266593258688}, "ground_truth": 0}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8256202144482617, "res": {"Yes": 0.8256202144482617, "yes": 0.16430533226868976}, "ground_truth": 0}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7770209393556005, "res": {"Yes": 0.7770209393556005, "yes": 0.2102542218895269}, "ground_truth": 0}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9322940124898146, "res": {"Yes": 0.9322940124898146, "yes": 0.06210627179834118}, "ground_truth": 1}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8872355001959022, "res": {"Yes": 0.8872355001959022, "yes": 0.10821519370671633}, "ground_truth": 0}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8645929420816523, "res": {"Yes": 0.8645929420816523, "yes": 0.12399915846161605}, "ground_truth": 0}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7814160160207503, "res": {"Yes": 0.7814160160207503, "yes": 0.19902566355194787}, "ground_truth": 0}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7537662291212976, "res": {"Yes": 0.7537662291212976, "yes": 0.2188784775959501}, "ground_truth": 0}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8279913429714709, "res": {"Yes": 0.8279913429714709, "yes": 0.1563840403305619}, "ground_truth": 1}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9270371710501198, "res": {"Yes": 0.9270371710501198, "yes": 0.061557277262119174}, "ground_truth": 0}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9250110181751047, "res": {"Yes": 0.9250110181751047, "yes": 0.067246621478506}, "ground_truth": 0}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7882616350948991, "res": {"Yes": 0.7882616350948991, "yes": 0.20836002627526992}, "ground_truth": 0}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7059789484450244, "res": {"Yes": 0.7059789484450244, "yes": 0.2889216889733256}, "ground_truth": 0}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6968819160543334, "res": {"Yes": 0.6968819160543334, "yes": 0.2991680270666971}, "ground_truth": 1}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8309771512434031, "res": {"Yes": 0.8309771512434031, "yes": 0.1630429362965796}, "ground_truth": 0}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8078921557301835, "res": {"Yes": 0.8078921557301835, "yes": 0.18847658449419225}, "ground_truth": 0}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6423707177303749, "res": {"Yes": 0.6423707177303749, "yes": 0.3521458650052783}, "ground_truth": 0}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8422717107015775, "res": {"Yes": 0.8422717107015775, "yes": 0.152175518096214}, "ground_truth": 0}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5477242018600134, "res": {"Yes": 0.5477242018600134, "yes": 0.434351658471437}, "ground_truth": 1}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8482902125139578, "res": {"Yes": 0.8482902125139578, "yes": 0.13716848379866284}, "ground_truth": 0}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7506700584795892, "res": {"Yes": 0.7506700584795892, "yes": 0.23914022108280894}, "ground_truth": 0}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8661215866380617, "res": {"Yes": 0.8661215866380617, "yes": 0.12053112025705462}, "ground_truth": 0}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9496642575100599, "res": {"Yes": 0.9496642575100599, "yes": 0.043774929995523425}, "ground_truth": 0}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7653610237555774, "res": {"Yes": 0.7653610237555774, "yes": 0.21801306212139684}, "ground_truth": 1}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7907629816882952, "res": {"Yes": 0.7907629816882952, "yes": 0.2007796180603352}, "ground_truth": 0}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.89930892655156, "res": {"Yes": 0.89930892655156, "yes": 0.08531786577922866}, "ground_truth": 0}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9292504309906876, "res": {"Yes": 0.9292504309906876, "yes": 0.06458931398923273}, "ground_truth": 0}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9400743115521915, "res": {"Yes": 0.9400743115521915, "yes": 0.05520966778510372}, "ground_truth": 0}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7911111047128695, "res": {"Yes": 0.7911111047128695, "yes": 0.20054355979434485}, "ground_truth": 1}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8716535317524474, "res": {"Yes": 0.8716535317524474, "yes": 0.11877294900021802}, "ground_truth": 0}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7865967112200262, "res": {"Yes": 0.7865967112200262, "yes": 0.19879121020454396}, "ground_truth": 0}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9385651932480374, "res": {"Yes": 0.9385651932480374, "yes": 0.056672726391311705}, "ground_truth": 0}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9355578505528489, "res": {"Yes": 0.9355578505528489, "yes": 0.05461509332000005}, "ground_truth": 0}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8941346374261666, "res": {"Yes": 0.8941346374261666, "yes": 0.09582556227548916}, "ground_truth": 1}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8901270932914863, "res": {"Yes": 0.8901270932914863, "yes": 0.09918846020863357}, "ground_truth": 0}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8610119805146954, "res": {"Yes": 0.8610119805146954, "yes": 0.12817394401056337}, "ground_truth": 0}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.4924888294239057, "res": {"yes": 0.49735411577912003, "Yes": 0.4924888294239057}, "ground_truth": 0}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.651482952042883, "res": {"Yes": 0.651482952042883, "yes": 0.3390331461933236}, "ground_truth": 0}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8092352363199261, "res": {"Yes": 0.8092352363199261, "yes": 0.18298181548729864}, "ground_truth": 1}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9014202805793888, "res": {"Yes": 0.9014202805793888, "yes": 0.09364848757333231}, "ground_truth": 0}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8828433064626697, "res": {"Yes": 0.8828433064626697, "yes": 0.11305385283883727}, "ground_truth": 0}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8195563759982208, "res": {"Yes": 0.8195563759982208, "yes": 0.17374040831560378}, "ground_truth": 0}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.847846342068439, "res": {"Yes": 0.847846342068439, "yes": 0.14367899056939654}, "ground_truth": 0}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8331520837086765, "res": {"Yes": 0.8331520837086765, "yes": 0.16108670799392968}, "ground_truth": 1}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9014208755169704, "res": {"Yes": 0.9014208755169704, "yes": 0.09550387083626058}, "ground_truth": 0}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7898116339420331, "res": {"Yes": 0.7898116339420331, "yes": 0.20535280070141335}, "ground_truth": 0}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7961190054326783, "res": {"Yes": 0.7961190054326783, "yes": 0.1996296857422172}, "ground_truth": 0}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7328118242793561, "res": {"Yes": 0.7328118242793561, "yes": 0.26183537824775716}, "ground_truth": 0}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6771830084004932, "res": {"Yes": 0.6771830084004932, "yes": 0.3114600809531191}, "ground_truth": 1}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8334809604030025, "res": {"Yes": 0.8334809604030025, "yes": 0.15898799225858046}, "ground_truth": 0}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8744153127316392, "res": {"Yes": 0.8744153127316392, "yes": 0.11774630813445808}, "ground_truth": 0}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6790190336032846, "res": {"Yes": 0.6790190336032846, "yes": 0.3080857887159903}, "ground_truth": 0}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.772841111027046, "res": {"Yes": 0.772841111027046, "yes": 0.21836165999400486}, "ground_truth": 0}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7550321710338945, "res": {"Yes": 0.7550321710338945, "yes": 0.2320320431099149}, "ground_truth": 1}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7399171932935223, "res": {"Yes": 0.7399171932935223, "yes": 0.24968384036251126}, "ground_truth": 0}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6909816854359097, "res": {"Yes": 0.6909816854359097, "yes": 0.2914226966372637}, "ground_truth": 0}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8447130965403908, "res": {"Yes": 0.8447130965403908, "yes": 0.14829156639482508}, "ground_truth": 0}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.43093750473794934, "res": {"yes": 0.5088426732967418, "Yes": 0.43093750473794934}, "ground_truth": 0}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4070987475478679, "res": {"yes": 0.5012244621442086, "Yes": 0.4070987475478679}, "ground_truth": 1}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.45083322374911255, "res": {"yes": 0.4997388784881651, "Yes": 0.45083322374911255}, "ground_truth": 0}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7799809364857935, "res": {"Yes": 0.7799809364857935, "yes": 0.21195679971591178}, "ground_truth": 0}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7552902247159212, "res": {"Yes": 0.7552902247159212, "yes": 0.23581018462917666}, "ground_truth": 0}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7732102617062984, "res": {"Yes": 0.7732102617062984, "yes": 0.2186220585610696}, "ground_truth": 0}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7739469637708895, "res": {"Yes": 0.7739469637708895, "yes": 0.19703046845401345}, "ground_truth": 1}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8102237458625884, "res": {"Yes": 0.8102237458625884, "yes": 0.18287646659788387}, "ground_truth": 0}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.779422467368647, "res": {"Yes": 0.779422467368647, "yes": 0.21295263689374594}, "ground_truth": 0}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6668725838669827, "res": {"Yes": 0.6668725838669827, "yes": 0.308674609689454}, "ground_truth": 0}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8363482714537784, "res": {"Yes": 0.8363482714537784, "yes": 0.1553167498261184}, "ground_truth": 0}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.832456184286759, "res": {"Yes": 0.832456184286759, "yes": 0.15729826482035691}, "ground_truth": 1}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7856754463161117, "res": {"Yes": 0.7856754463161117, "yes": 0.2023850111285635}, "ground_truth": 0}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8819696711657171, "res": {"Yes": 0.8819696711657171, "yes": 0.11063722204433528}, "ground_truth": 0}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.981548285359379, "res": {"Yes": 0.981548285359379, "yes": 0.012953179496586512}, "ground_truth": 0}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.97568768368807, "res": {"Yes": 0.97568768368807, "yes": 0.02023504250678457}, "ground_truth": 0}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8100245282370504, "res": {"Yes": 0.8100245282370504, "yes": 0.18295916374071333}, "ground_truth": 1}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9245567227317997, "res": {"Yes": 0.9245567227317997, "yes": 0.0708418252917372}, "ground_truth": 0}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9124659128537018, "res": {"Yes": 0.9124659128537018, "yes": 0.08169722854740001}, "ground_truth": 0}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8936876854304084, "res": {"Yes": 0.8936876854304084, "yes": 0.10093558882357703}, "ground_truth": 0}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8741142771377736, "res": {"Yes": 0.8741142771377736, "yes": 0.12088417780800811}, "ground_truth": 0}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8847718445317873, "res": {"Yes": 0.8847718445317873, "yes": 0.11126798589909885}, "ground_truth": 1}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9209308840549246, "res": {"Yes": 0.9209308840549246, "yes": 0.07247130417177766}, "ground_truth": 0}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9354327617336696, "res": {"Yes": 0.9354327617336696, "yes": 0.05931719789096384}, "ground_truth": 0}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8799397673901378, "res": {"Yes": 0.8799397673901378, "yes": 0.1103396437279258}, "ground_truth": 0}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9003053728873279, "res": {"Yes": 0.9003053728873279, "yes": 0.08971662383982318}, "ground_truth": 0}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8717535510166761, "res": {"Yes": 0.8717535510166761, "yes": 0.12006170020072067}, "ground_truth": 1}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8712879584949363, "res": {"Yes": 0.8712879584949363, "yes": 0.1231651058176586}, "ground_truth": 0}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8502695433014825, "res": {"Yes": 0.8502695433014825, "yes": 0.14034526694086424}, "ground_truth": 0}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9748369380466906, "res": {"Yes": 0.9748369380466906, "yes": 0.020426346832875142}, "ground_truth": 0}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5457212307568625, "res": {"Yes": 0.5457212307568625, "yes": 0.43095400996045374}, "ground_truth": 0}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7746638931158671, "res": {"Yes": 0.7746638931158671, "yes": 0.21691877592590614}, "ground_truth": 1}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7846379247197072, "res": {"Yes": 0.7846379247197072, "yes": 0.20333789018258136}, "ground_truth": 0}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9487172689479516, "res": {"Yes": 0.9487172689479516, "yes": 0.041002099757157205}, "ground_truth": 0}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7235678551175935, "res": {"Yes": 0.7235678551175935, "yes": 0.24245178817978588}, "ground_truth": 0}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.929513615249, "res": {"Yes": 0.929513615249, "yes": 0.05742286557578863}, "ground_truth": 0}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8963453867505554, "res": {"Yes": 0.8963453867505554, "yes": 0.09010244303766339}, "ground_truth": 1}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8798613154579165, "res": {"Yes": 0.8798613154579165, "yes": 0.1059942215537352}, "ground_truth": 0}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9299458782112796, "res": {"Yes": 0.9299458782112796, "yes": 0.060800364193699726}, "ground_truth": 0}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5320819287966732, "res": {"Yes": 0.5320819287966732, "yes": 0.4595734694400358}, "ground_truth": 0}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9733401246812677, "res": {"Yes": 0.9733401246812677, "yes": 0.021855545303627023}, "ground_truth": 0}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5856539458104036, "res": {"Yes": 0.5856539458104036, "yes": 0.4051417391062692}, "ground_truth": 1}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7410118217756634, "res": {"Yes": 0.7410118217756634, "yes": 0.24945181716882012}, "ground_truth": 0}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9749083133459491, "res": {"Yes": 0.9749083133459491, "yes": 0.01993220726523492}, "ground_truth": 0}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.4091323832021532, "res": {"yes": 0.5763977029680192, "Yes": 0.4091323832021532}, "ground_truth": 0}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5735534770168145, "res": {"Yes": 0.5735534770168145, "yes": 0.41575009184296846}, "ground_truth": 0}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5905856543067006, "res": {"Yes": 0.5905856543067006, "yes": 0.3929277996083565}, "ground_truth": 1}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6689656068273686, "res": {"Yes": 0.6689656068273686, "yes": 0.3112471773498012}, "ground_truth": 0}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7051274093612393, "res": {"Yes": 0.7051274093612393, "yes": 0.28155927082873977}, "ground_truth": 0}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7598929734700178, "res": {"Yes": 0.7598929734700178, "yes": 0.23080056415625233}, "ground_truth": 0}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9399835113396188, "res": {"Yes": 0.9399835113396188, "yes": 0.05898199801643532}, "ground_truth": 0}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8355948270668634, "res": {"Yes": 0.8355948270668634, "yes": 0.15935899366801992}, "ground_truth": 1}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9185630703496265, "res": {"Yes": 0.9185630703496265, "yes": 0.07889340826955099}, "ground_truth": 0}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9351564195959221, "res": {"Yes": 0.9351564195959221, "yes": 0.06164985508394079}, "ground_truth": 0}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.734500134683959, "res": {"Yes": 0.734500134683959, "yes": 0.2482345782101504}, "ground_truth": 0}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6444463196412066, "res": {"Yes": 0.6444463196412066, "yes": 0.3445238362726308}, "ground_truth": 0}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8438538186827447, "res": {"Yes": 0.8438538186827447, "yes": 0.1415813663436512}, "ground_truth": 1}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7956767170435918, "res": {"Yes": 0.7956767170435918, "yes": 0.19575866097636938}, "ground_truth": 0}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9430828021929065, "res": {"Yes": 0.9430828021929065, "yes": 0.04888617217978103}, "ground_truth": 0}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6025624906949515, "res": {"Yes": 0.6025624906949515, "yes": 0.38970270321699907}, "ground_truth": 0}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.4273916812088545, "res": {"yes": 0.5622315237243238, "Yes": 0.4273916812088545}, "ground_truth": 0}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3705440677586722, "res": {"yes": 0.6159415354109623, "Yes": 0.3705440677586722}, "ground_truth": 1}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9812306056730815, "res": {"Yes": 0.9812306056730815, "yes": 0.015086298970532253}, "ground_truth": 0}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.519753197638663, "res": {"Yes": 0.519753197638663, "yes": 0.46894658888285673}, "ground_truth": 0}, {"key": "7c72b57e65ddefc77362ed64f7dbc0aebb87ac81", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8156742558339509, "res": {"Yes": 0.8156742558339509, "yes": 0.17490050667861476}, "ground_truth": 0}, {"key": "7c72b57e65ddefc77362ed64f7dbc0aebb87ac81", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7314134895297738, "res": {"Yes": 0.7314134895297738, "yes": 0.2409915520115181}, "ground_truth": 0}, {"key": "7c72b57e65ddefc77362ed64f7dbc0aebb87ac81", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7537644502350961, "res": {"Yes": 0.7537644502350961, "yes": 0.21985683378187454}, "ground_truth": 1}, {"key": "7c72b57e65ddefc77362ed64f7dbc0aebb87ac81", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9006250200194084, "res": {"Yes": 0.9006250200194084, "yes": 0.0809491094624935}, "ground_truth": 0}, {"key": "7c72b57e65ddefc77362ed64f7dbc0aebb87ac81", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8688616868182326, "res": {"Yes": 0.8688616868182326, "yes": 0.12718172624402613}, "ground_truth": 0}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.903782712283421, "res": {"Yes": 0.903782712283421, "yes": 0.08793564841694927}, "ground_truth": 0}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9140507407729668, "res": {"Yes": 0.9140507407729668, "yes": 0.0757594340828165}, "ground_truth": 0}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8820281829745175, "res": {"Yes": 0.8820281829745175, "yes": 0.10739504864198918}, "ground_truth": 1}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9238654396774089, "res": {"Yes": 0.9238654396774089, "yes": 0.06228849133678499}, "ground_truth": 0}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.75741362841045, "res": {"Yes": 0.75741362841045, "yes": 0.22801380582428207}, "ground_truth": 0}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7101961922538385, "res": {"Yes": 0.7101961922538385, "yes": 0.2732595781065045}, "ground_truth": 0}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8425056167603329, "res": {"Yes": 0.8425056167603329, "yes": 0.1462320844296775}, "ground_truth": 0}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7885284595121617, "res": {"Yes": 0.7885284595121617, "yes": 0.20328446000607406}, "ground_truth": 1}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8368347828767752, "res": {"Yes": 0.8368347828767752, "yes": 0.1509831255101949}, "ground_truth": 0}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8713829079169988, "res": {"Yes": 0.8713829079169988, "yes": 0.12124142176466327}, "ground_truth": 0}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6616957174677187, "res": {"Yes": 0.6616957174677187, "yes": 0.33523386038753133}, "ground_truth": 0}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.970415734535031, "res": {"Yes": 0.970415734535031, "yes": 0.02688872298930336}, "ground_truth": 0}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9668649809594015, "res": {"Yes": 0.9668649809594015, "yes": 0.025710592820413068}, "ground_truth": 1}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9146203608306666, "res": {"Yes": 0.9146203608306666, "yes": 0.07857455183293319}, "ground_truth": 0}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.4450526451968923, "res": {"yes": 0.5444735578757317, "Yes": 0.4450526451968923}, "ground_truth": 0}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9535067927025713, "res": {"Yes": 0.9535067927025713, "yes": 0.042462166479409716}, "ground_truth": 0}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9223734629589071, "res": {"Yes": 0.9223734629589071, "yes": 0.07304940397180712}, "ground_truth": 0}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9382144794346855, "res": {"Yes": 0.9382144794346855, "yes": 0.057353179308699855}, "ground_truth": 1}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9519406581591947, "res": {"Yes": 0.9519406581591947, "yes": 0.04320669149541405}, "ground_truth": 0}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9711550691287646, "res": {"Yes": 0.9711550691287646, "yes": 0.025130935542645182}, "ground_truth": 0}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7089634679082695, "res": {"Yes": 0.7089634679082695, "yes": 0.2858563405869669}, "ground_truth": 0}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6240889142364577, "res": {"Yes": 0.6240889142364577, "yes": 0.3685852318747819}, "ground_truth": 0}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.39280750353438526, "res": {"yes": 0.584894225037753, "Yes": 0.39280750353438526}, "ground_truth": 1}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5245498881683871, "res": {"Yes": 0.5245498881683871, "yes": 0.4663090207390276}, "ground_truth": 0}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6219747387561001, "res": {"Yes": 0.6219747387561001, "yes": 0.37140339092107527}, "ground_truth": 0}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7536285587557037, "res": {"Yes": 0.7536285587557037, "yes": 0.23634052769375047}, "ground_truth": 0}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.625068378268782, "res": {"Yes": 0.625068378268782, "yes": 0.3669310218335011}, "ground_truth": 0}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8207068071983794, "res": {"Yes": 0.8207068071983794, "yes": 0.17368539344035555}, "ground_truth": 1}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8615194554863735, "res": {"Yes": 0.8615194554863735, "yes": 0.1333877338337551}, "ground_truth": 0}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8270089183817223, "res": {"Yes": 0.8270089183817223, "yes": 0.1573702767148301}, "ground_truth": 0}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9142261584560788, "res": {"Yes": 0.9142261584560788, "yes": 0.0803529478383418}, "ground_truth": 0}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8511715451956363, "res": {"Yes": 0.8511715451956363, "yes": 0.142054427864379}, "ground_truth": 0}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9747201566525222, "res": {"Yes": 0.9747201566525222, "yes": 0.013935970161271998}, "ground_truth": 1}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8972501641558688, "res": {"Yes": 0.8972501641558688, "yes": 0.09593429969356405}, "ground_truth": 0}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9427978041460849, "res": {"Yes": 0.9427978041460849, "yes": 0.04946822671742541}, "ground_truth": 0}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9429061067215306, "res": {"Yes": 0.9429061067215306, "yes": 0.05137672654577593}, "ground_truth": 0}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8893847566200174, "res": {"Yes": 0.8893847566200174, "yes": 0.10823469584171186}, "ground_truth": 0}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9143876835735529, "res": {"Yes": 0.9143876835735529, "yes": 0.07881548431206772}, "ground_truth": 1}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9107922125043492, "res": {"Yes": 0.9107922125043492, "yes": 0.0837502603186001}, "ground_truth": 0}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9060545455047603, "res": {"Yes": 0.9060545455047603, "yes": 0.08940382611603073}, "ground_truth": 0}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9181698625693542, "res": {"Yes": 0.9181698625693542, "yes": 0.06532769585999239}, "ground_truth": 0}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9076556794068255, "res": {"Yes": 0.9076556794068255, "yes": 0.0864557376048191}, "ground_truth": 0}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8258922763559733, "res": {"Yes": 0.8258922763559733, "yes": 0.1659994506447576}, "ground_truth": 1}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9632754835742204, "res": {"Yes": 0.9632754835742204, "yes": 0.03163086166558719}, "ground_truth": 0}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8751704864032224, "res": {"Yes": 0.8751704864032224, "yes": 0.11097486996645586}, "ground_truth": 0}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8190199552838119, "res": {"Yes": 0.8190199552838119, "yes": 0.17309830653635916}, "ground_truth": 0}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6477920011736353, "res": {"Yes": 0.6477920011736353, "yes": 0.34461373426819786}, "ground_truth": 0}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7984156918957341, "res": {"Yes": 0.7984156918957341, "yes": 0.19236385927519317}, "ground_truth": 1}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7985546283132418, "res": {"Yes": 0.7985546283132418, "yes": 0.1961681067216274}, "ground_truth": 0}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9664277784090799, "res": {"Yes": 0.9664277784090799, "yes": 0.030112015525123905}, "ground_truth": 0}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7844767217270716, "res": {"Yes": 0.7844767217270716, "yes": 0.2057472510800487}, "ground_truth": 0}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7394593410645223, "res": {"Yes": 0.7394593410645223, "yes": 0.25490455449194593}, "ground_truth": 0}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6315782711867453, "res": {"Yes": 0.6315782711867453, "yes": 0.35811368038953956}, "ground_truth": 1}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.752084270105644, "res": {"Yes": 0.752084270105644, "yes": 0.24046358851604646}, "ground_truth": 0}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6991172578210283, "res": {"Yes": 0.6991172578210283, "yes": 0.2891219521847925}, "ground_truth": 0}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8492604220102201, "res": {"Yes": 0.8492604220102201, "yes": 0.14253127922890968}, "ground_truth": 0}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9285457652116691, "res": {"Yes": 0.9285457652116691, "yes": 0.06558533892546585}, "ground_truth": 0}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9509712359533551, "res": {"Yes": 0.9509712359533551, "yes": 0.04381811336240031}, "ground_truth": 1}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9443343757076739, "res": {"Yes": 0.9443343757076739, "yes": 0.05003476075520326}, "ground_truth": 0}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8881919894705799, "res": {"Yes": 0.8881919894705799, "yes": 0.10409065258959167}, "ground_truth": 0}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9743297882659491, "res": {"Yes": 0.9743297882659491, "yes": 0.02052882054332151}, "ground_truth": 0}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6581781760774768, "res": {"Yes": 0.6581781760774768, "yes": 0.3351270718919225}, "ground_truth": 0}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7367878385098071, "res": {"Yes": 0.7367878385098071, "yes": 0.2607954227210181}, "ground_truth": 1}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9783750194277245, "res": {"Yes": 0.9783750194277245, "yes": 0.01925576658983695}, "ground_truth": 0}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7213321053970134, "res": {"Yes": 0.7213321053970134, "yes": 0.2725709314095604}, "ground_truth": 0}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9529225436203792, "res": {"Yes": 0.9529225436203792, "yes": 0.04301756320724918}, "ground_truth": 0}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8995500186202432, "res": {"Yes": 0.8995500186202432, "yes": 0.09360107598485258}, "ground_truth": 0}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9481200061243763, "res": {"Yes": 0.9481200061243763, "yes": 0.04758582291630638}, "ground_truth": 1}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9489457608592774, "res": {"Yes": 0.9489457608592774, "yes": 0.04708105444326799}, "ground_truth": 0}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9460585184658925, "res": {"Yes": 0.9460585184658925, "yes": 0.04811913807629617}, "ground_truth": 0}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.640618457109505, "res": {"Yes": 0.640618457109505, "yes": 0.34469607232420374}, "ground_truth": 0}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7666903307605073, "res": {"Yes": 0.7666903307605073, "yes": 0.19228591000051648}, "ground_truth": 0}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8290314063039241, "res": {"Yes": 0.8290314063039241, "yes": 0.1625180510495176}, "ground_truth": 1}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7698144478439709, "res": {"Yes": 0.7698144478439709, "yes": 0.22036800061860848}, "ground_truth": 0}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8409686472711619, "res": {"Yes": 0.8409686472711619, "yes": 0.15285338270656373}, "ground_truth": 0}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8384609919746536, "res": {"Yes": 0.8384609919746536, "yes": 0.14647128156712352}, "ground_truth": 0}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8336791774066533, "res": {"Yes": 0.8336791774066533, "yes": 0.15412207434959022}, "ground_truth": 0}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8113190735473655, "res": {"Yes": 0.8113190735473655, "yes": 0.17888454164472894}, "ground_truth": 1}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7529623367721723, "res": {"Yes": 0.7529623367721723, "yes": 0.2396118875075894}, "ground_truth": 0}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8652909848059429, "res": {"Yes": 0.8652909848059429, "yes": 0.12744775029284938}, "ground_truth": 0}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7876962208393963, "res": {"Yes": 0.7876962208393963, "yes": 0.20528500440040703}, "ground_truth": 0}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8490306517208016, "res": {"Yes": 0.8490306517208016, "yes": 0.14478704083955682}, "ground_truth": 0}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.857744250729947, "res": {"Yes": 0.857744250729947, "yes": 0.13478333712187932}, "ground_truth": 1}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8956146201243814, "res": {"Yes": 0.8956146201243814, "yes": 0.09664790980473366}, "ground_truth": 0}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8782052524021464, "res": {"Yes": 0.8782052524021464, "yes": 0.11374138048252773}, "ground_truth": 0}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6784682603137459, "res": {"Yes": 0.6784682603137459, "yes": 0.3172574467023744}, "ground_truth": 0}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5606965101313632, "res": {"Yes": 0.5606965101313632, "yes": 0.4343011202528278}, "ground_truth": 0}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7773328515266301, "res": {"Yes": 0.7773328515266301, "yes": 0.21611420157954164}, "ground_truth": 1}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8425777130505548, "res": {"Yes": 0.8425777130505548, "yes": 0.15158755502615529}, "ground_truth": 0}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7168362117306434, "res": {"Yes": 0.7168362117306434, "yes": 0.2706241827747281}, "ground_truth": 0}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.905365526628053, "res": {"Yes": 0.905365526628053, "yes": 0.0865253538413131}, "ground_truth": 0}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.873281084144751, "res": {"Yes": 0.873281084144751, "yes": 0.1095395727539268}, "ground_truth": 0}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8933621868091287, "res": {"Yes": 0.8933621868091287, "yes": 0.0961668283904872}, "ground_truth": 1}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9276329847445076, "res": {"Yes": 0.9276329847445076, "yes": 0.051197969803409786}, "ground_truth": 0}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8733055538235474, "res": {"Yes": 0.8733055538235474, "yes": 0.11701158874346193}, "ground_truth": 0}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8510035915754363, "res": {"Yes": 0.8510035915754363, "yes": 0.14399400675920565}, "ground_truth": 0}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9591570472843984, "res": {"Yes": 0.9591570472843984, "yes": 0.03450269645096551}, "ground_truth": 0}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9685223494947003, "res": {"Yes": 0.9685223494947003, "yes": 0.02260356142369228}, "ground_truth": 1}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7033908410476118, "res": {"Yes": 0.7033908410476118, "yes": 0.291386853849954}, "ground_truth": 0}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9498943871391746, "res": {"Yes": 0.9498943871391746, "yes": 0.04422397129142181}, "ground_truth": 0}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.788423079725709, "res": {"Yes": 0.788423079725709, "yes": 0.1957467004875804}, "ground_truth": 0}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6262740776354914, "res": {"Yes": 0.6262740776354914, "yes": 0.3604977729305757}, "ground_truth": 0}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6303105780752375, "res": {"Yes": 0.6303105780752375, "yes": 0.3602223497491011}, "ground_truth": 1}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6652428879329834, "res": {"Yes": 0.6652428879329834, "yes": 0.3261016618707708}, "ground_truth": 0}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8466777984064005, "res": {"Yes": 0.8466777984064005, "yes": 0.14690691853627438}, "ground_truth": 0}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9559126733224932, "res": {"Yes": 0.9559126733224932, "yes": 0.03970184971726301}, "ground_truth": 0}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9485725484241876, "res": {"Yes": 0.9485725484241876, "yes": 0.044653052898967135}, "ground_truth": 0}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9470457400644539, "res": {"Yes": 0.9470457400644539, "yes": 0.04457074842677887}, "ground_truth": 1}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9658599524882477, "res": {"Yes": 0.9658599524882477, "yes": 0.02867001430882381}, "ground_truth": 0}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9568864850282742, "res": {"Yes": 0.9568864850282742, "yes": 0.034902521608195784}, "ground_truth": 0}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.923854041095932, "res": {"Yes": 0.923854041095932, "yes": 0.05821245172285936}, "ground_truth": 0}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8737244217224581, "res": {"Yes": 0.8737244217224581, "yes": 0.11357005588537834}, "ground_truth": 0}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8807954141916235, "res": {"Yes": 0.8807954141916235, "yes": 0.10297908187191218}, "ground_truth": 1}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8627155743770692, "res": {"Yes": 0.8627155743770692, "yes": 0.12249614731276177}, "ground_truth": 0}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9433877180798544, "res": {"Yes": 0.9433877180798544, "yes": 0.04879818302063094}, "ground_truth": 0}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7222855642025281, "res": {"Yes": 0.7222855642025281, "yes": 0.27412959772996676}, "ground_truth": 0}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7580379717025989, "res": {"Yes": 0.7580379717025989, "yes": 0.2380527479083301}, "ground_truth": 0}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7961664714627306, "res": {"Yes": 0.7961664714627306, "yes": 0.19813596342153517}, "ground_truth": 1}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9057862843534518, "res": {"Yes": 0.9057862843534518, "yes": 0.08846387521158064}, "ground_truth": 0}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8380344663918159, "res": {"Yes": 0.8380344663918159, "yes": 0.15668994482410506}, "ground_truth": 0}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9650445875456864, "res": {"Yes": 0.9650445875456864, "yes": 0.025368571845460007}, "ground_truth": 0}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9687009614834663, "res": {"Yes": 0.9687009614834663, "yes": 0.02496730774716977}, "ground_truth": 0}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7770968735518787, "res": {"Yes": 0.7770968735518787, "yes": 0.20799613108639275}, "ground_truth": 1}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7145067460637744, "res": {"Yes": 0.7145067460637744, "yes": 0.2742841132889577}, "ground_truth": 0}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.934204625155429, "res": {"Yes": 0.934204625155429, "yes": 0.05289645265117975}, "ground_truth": 0}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7909928582628969, "res": {"Yes": 0.7909928582628969, "yes": 0.19932083825576244}, "ground_truth": 0}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8254585243078922, "res": {"Yes": 0.8254585243078922, "yes": 0.16183957697064205}, "ground_truth": 0}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8767422144611197, "res": {"Yes": 0.8767422144611197, "yes": 0.11701251313866436}, "ground_truth": 1}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8868702384410639, "res": {"Yes": 0.8868702384410639, "yes": 0.10169291947832948}, "ground_truth": 0}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9190084701187404, "res": {"Yes": 0.9190084701187404, "yes": 0.07001605542627519}, "ground_truth": 0}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.972789183273421, "res": {"Yes": 0.972789183273421, "yes": 0.020527541637639982}, "ground_truth": 0}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9300779445525335, "res": {"Yes": 0.9300779445525335, "yes": 0.055225426873004475}, "ground_truth": 0}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9636444116528526, "res": {"Yes": 0.9636444116528526, "yes": 0.029438527598797122}, "ground_truth": 1}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.973119492511374, "res": {"Yes": 0.973119492511374, "yes": 0.020633547672681673}, "ground_truth": 0}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.3498785096649914, "res": {"yes": 0.6443706725279381, "Yes": 0.3498785096649914}, "ground_truth": 0}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7862349442173387, "res": {"Yes": 0.7862349442173387, "yes": 0.20264198046890347}, "ground_truth": 0}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8845593124742299, "res": {"Yes": 0.8845593124742299, "yes": 0.10474007228432447}, "ground_truth": 0}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8758212249328641, "res": {"Yes": 0.8758212249328641, "yes": 0.1149112256035702}, "ground_truth": 1}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9376964972812262, "res": {"Yes": 0.9376964972812262, "yes": 0.05617317214639955}, "ground_truth": 0}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8994438240138358, "res": {"Yes": 0.8994438240138358, "yes": 0.0986493337708077}, "ground_truth": 0}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5853345587037174, "res": {"Yes": 0.5853345587037174, "yes": 0.3991840461090998}, "ground_truth": 0}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5674523488137557, "res": {"Yes": 0.5674523488137557, "yes": 0.4146754553507335}, "ground_truth": 0}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.49180976193346504, "res": {"Yes": 0.49180976193346504, "yes": 0.48475511164753315}, "ground_truth": 1}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.28594153847031595, "res": {"yes": 0.7024652325717667, "Yes": 0.28594153847031595}, "ground_truth": 0}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5072334386979714, "res": {"Yes": 0.5072334386979714, "yes": 0.48337172710538423}, "ground_truth": 0}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8478912451997394, "res": {"Yes": 0.8478912451997394, "yes": 0.14578935107053184}, "ground_truth": 0}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9419316485161496, "res": {"Yes": 0.9419316485161496, "yes": 0.05366803532718256}, "ground_truth": 0}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7601150314378751, "res": {"Yes": 0.7601150314378751, "yes": 0.23173089868598612}, "ground_truth": 1}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8140960438110478, "res": {"Yes": 0.8140960438110478, "yes": 0.1770682371643106}, "ground_truth": 0}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8505563620753169, "res": {"Yes": 0.8505563620753169, "yes": 0.1341627585547972}, "ground_truth": 0}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8250963430591588, "res": {"Yes": 0.8250963430591588, "yes": 0.15130978310009355}, "ground_truth": 0}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6457265771390566, "res": {"Yes": 0.6457265771390566, "yes": 0.3296661840440972}, "ground_truth": 0}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7099925513912829, "res": {"Yes": 0.7099925513912829, "yes": 0.2730429145492769}, "ground_truth": 1}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8635036969928452, "res": {"Yes": 0.8635036969928452, "yes": 0.12092911880866691}, "ground_truth": 0}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7807218566706414, "res": {"Yes": 0.7807218566706414, "yes": 0.20324455918296877}, "ground_truth": 0}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5309321997389217, "res": {"Yes": 0.5309321997389217, "yes": 0.4589557861646281}, "ground_truth": 0}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8392553906224447, "res": {"Yes": 0.8392553906224447, "yes": 0.15457694202100908}, "ground_truth": 0}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8604896339033707, "res": {"Yes": 0.8604896339033707, "yes": 0.1322767447441458}, "ground_truth": 1}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8951586391207809, "res": {"Yes": 0.8951586391207809, "yes": 0.10092187260909384}, "ground_truth": 0}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8405827900773631, "res": {"Yes": 0.8405827900773631, "yes": 0.1536193944943835}, "ground_truth": 0}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8784970670052505, "res": {"Yes": 0.8784970670052505, "yes": 0.10724321356063388}, "ground_truth": 0}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8348314339665855, "res": {"Yes": 0.8348314339665855, "yes": 0.1566550695241521}, "ground_truth": 0}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.819509457738743, "res": {"Yes": 0.819509457738743, "yes": 0.16925779705145838}, "ground_truth": 1}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8850120218988261, "res": {"Yes": 0.8850120218988261, "yes": 0.10131943806550477}, "ground_truth": 0}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9758898388208048, "res": {"Yes": 0.9758898388208048, "yes": 0.023830233246119013}, "ground_truth": 0}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8802996219246791, "res": {"Yes": 0.8802996219246791, "yes": 0.11333239834172801}, "ground_truth": 0}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.956381387104935, "res": {"Yes": 0.956381387104935, "yes": 0.03976528434355348}, "ground_truth": 0}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6554147222303489, "res": {"Yes": 0.6554147222303489, "yes": 0.33584511250094545}, "ground_truth": 1}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8041292934366502, "res": {"Yes": 0.8041292934366502, "yes": 0.19214094264125692}, "ground_truth": 0}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.863804343590813, "res": {"Yes": 0.863804343590813, "yes": 0.13321640071027716}, "ground_truth": 0}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8613220953733781, "res": {"Yes": 0.8613220953733781, "yes": 0.13241708539973762}, "ground_truth": 0}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7612106519725099, "res": {"Yes": 0.7612106519725099, "yes": 0.2330005418226597}, "ground_truth": 0}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8772312573318521, "res": {"Yes": 0.8772312573318521, "yes": 0.11687463667981753}, "ground_truth": 1}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9273140637016107, "res": {"Yes": 0.9273140637016107, "yes": 0.06712464599396087}, "ground_truth": 0}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9719722250319338, "res": {"Yes": 0.9719722250319338, "yes": 0.0190836315168663}, "ground_truth": 0}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8577825927548924, "res": {"Yes": 0.8577825927548924, "yes": 0.13418487039937302}, "ground_truth": 0}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7198376777202016, "res": {"Yes": 0.7198376777202016, "yes": 0.2735940063744024}, "ground_truth": 0}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7802309916493747, "res": {"Yes": 0.7802309916493747, "yes": 0.2138210133802216}, "ground_truth": 1}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8146010457579413, "res": {"Yes": 0.8146010457579413, "yes": 0.17961054698095397}, "ground_truth": 0}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7398939602584136, "res": {"Yes": 0.7398939602584136, "yes": 0.2498722729195045}, "ground_truth": 0}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9825427546522758, "res": {"Yes": 0.9825427546522758, "yes": 0.014680429481364249}, "ground_truth": 0}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9770091876919493, "res": {"Yes": 0.9770091876919493, "yes": 0.021079501575805768}, "ground_truth": 0}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8521617438249987, "res": {"Yes": 0.8521617438249987, "yes": 0.14042559548241182}, "ground_truth": 1}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9773526140072945, "res": {"Yes": 0.9773526140072945, "yes": 0.015529968036521437}, "ground_truth": 0}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6395981758776885, "res": {"Yes": 0.6395981758776885, "yes": 0.35624164510896467}, "ground_truth": 0}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9257574423085901, "res": {"Yes": 0.9257574423085901, "yes": 0.06823845614196852}, "ground_truth": 0}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9227167050789076, "res": {"Yes": 0.9227167050789076, "yes": 0.06752048808321238}, "ground_truth": 0}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8496512821245359, "res": {"Yes": 0.8496512821245359, "yes": 0.1416774335619559}, "ground_truth": 1}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9415256928357244, "res": {"Yes": 0.9415256928357244, "yes": 0.052589010864022866}, "ground_truth": 0}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.91385368170956, "res": {"Yes": 0.91385368170956, "yes": 0.08046266427268149}, "ground_truth": 0}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8847187562747978, "res": {"Yes": 0.8847187562747978, "yes": 0.10780188872722618}, "ground_truth": 0}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9136448589347251, "res": {"Yes": 0.9136448589347251, "yes": 0.07607562253941888}, "ground_truth": 0}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9071138799709705, "res": {"Yes": 0.9071138799709705, "yes": 0.08571219198168385}, "ground_truth": 1}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8894956609195052, "res": {"Yes": 0.8894956609195052, "yes": 0.10422474519262366}, "ground_truth": 0}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8887930235310703, "res": {"Yes": 0.8887930235310703, "yes": 0.10145684939566932}, "ground_truth": 0}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8230962952381503, "res": {"Yes": 0.8230962952381503, "yes": 0.17134320279367254}, "ground_truth": 0}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5209538087439419, "res": {"Yes": 0.5209538087439419, "yes": 0.4687687980476251}, "ground_truth": 0}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5107371013168871, "res": {"Yes": 0.5107371013168871, "yes": 0.48189795450068734}, "ground_truth": 1}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8419029606631747, "res": {"Yes": 0.8419029606631747, "yes": 0.15259619422356768}, "ground_truth": 0}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5078704924639358, "res": {"Yes": 0.5078704924639358, "yes": 0.48781331380480203}, "ground_truth": 0}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8702383668286395, "res": {"Yes": 0.8702383668286395, "yes": 0.12481190936444675}, "ground_truth": 0}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9821687619960414, "res": {"Yes": 0.9821687619960414, "yes": 0.015581756565460405}, "ground_truth": 0}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9407625228850555, "res": {"Yes": 0.9407625228850555, "yes": 0.05390407505155849}, "ground_truth": 1}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8740092673565821, "res": {"Yes": 0.8740092673565821, "yes": 0.12281685708634778}, "ground_truth": 0}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8870702857131249, "res": {"Yes": 0.8870702857131249, "yes": 0.10762306451461404}, "ground_truth": 0}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.960996455849918, "res": {"Yes": 0.960996455849918, "yes": 0.03460433208455483}, "ground_truth": 0}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9756770126502282, "res": {"Yes": 0.9756770126502282, "yes": 0.01656295009595692}, "ground_truth": 0}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9473250981940197, "res": {"Yes": 0.9473250981940197, "yes": 0.04840791236524307}, "ground_truth": 1}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9760810626735725, "res": {"Yes": 0.9760810626735725, "yes": 0.021270907179953086}, "ground_truth": 0}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9588350509379897, "res": {"Yes": 0.9588350509379897, "yes": 0.03375214864953902}, "ground_truth": 0}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7245586993547015, "res": {"Yes": 0.7245586993547015, "yes": 0.2716916487631387}, "ground_truth": 0}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.958650649215003, "res": {"Yes": 0.958650649215003, "yes": 0.0332489031631105}, "ground_truth": 0}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7631274527771055, "res": {"Yes": 0.7631274527771055, "yes": 0.23104804571016235}, "ground_truth": 1}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9759945242347482, "res": {"Yes": 0.9759945242347482, "yes": 0.01748274298022616}, "ground_truth": 0}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9624263608065871, "res": {"Yes": 0.9624263608065871, "yes": 0.0312542628782191}, "ground_truth": 0}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7709136570129642, "res": {"Yes": 0.7709136570129642, "yes": 0.22332807507113828}, "ground_truth": 0}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8920299105121657, "res": {"Yes": 0.8920299105121657, "yes": 0.1014677565932307}, "ground_truth": 0}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8923546408389663, "res": {"Yes": 0.8923546408389663, "yes": 0.10025136469203334}, "ground_truth": 1}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.867576948199767, "res": {"Yes": 0.867576948199767, "yes": 0.1273658022571745}, "ground_truth": 0}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6881982696050041, "res": {"Yes": 0.6881982696050041, "yes": 0.30411459237537025}, "ground_truth": 0}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9623588018097995, "res": {"Yes": 0.9623588018097995, "yes": 0.027334014499037912}, "ground_truth": 0}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.977357400114764, "res": {"Yes": 0.977357400114764, "yes": 0.017363881604747368}, "ground_truth": 0}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9715728119558679, "res": {"Yes": 0.9715728119558679, "yes": 0.021516906586531846}, "ground_truth": 1}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7478166360722227, "res": {"Yes": 0.7478166360722227, "yes": 0.24830287693667516}, "ground_truth": 0}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7905232586960893, "res": {"Yes": 0.7905232586960893, "yes": 0.20284200570305708}, "ground_truth": 0}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7606811477894435, "res": {"Yes": 0.7606811477894435, "yes": 0.2362580592411674}, "ground_truth": 0}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6969173185548848, "res": {"Yes": 0.6969173185548848, "yes": 0.2985769471416086}, "ground_truth": 0}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8607076235348599, "res": {"Yes": 0.8607076235348599, "yes": 0.13404606090481447}, "ground_truth": 1}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8612121029522936, "res": {"Yes": 0.8612121029522936, "yes": 0.13513040551656888}, "ground_truth": 0}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7774922055461715, "res": {"Yes": 0.7774922055461715, "yes": 0.2157519307594021}, "ground_truth": 0}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8239473885155402, "res": {"Yes": 0.8239473885155402, "yes": 0.17267912076659486}, "ground_truth": 0}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7519445834262739, "res": {"Yes": 0.7519445834262739, "yes": 0.2443333249682386}, "ground_truth": 0}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9815301504407895, "res": {"Yes": 0.9815301504407895, "yes": 0.011170917830821164}, "ground_truth": 1}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8589838569628689, "res": {"Yes": 0.8589838569628689, "yes": 0.1372857031747917}, "ground_truth": 0}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9514990382679912, "res": {"Yes": 0.9514990382679912, "yes": 0.04041069505360321}, "ground_truth": 0}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8935877320527899, "res": {"Yes": 0.8935877320527899, "yes": 0.09315181475987983}, "ground_truth": 0}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8793298022657362, "res": {"Yes": 0.8793298022657362, "yes": 0.10780540312608307}, "ground_truth": 0}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9117577036757548, "res": {"Yes": 0.9117577036757548, "yes": 0.07935320834483069}, "ground_truth": 1}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9610237610301273, "res": {"Yes": 0.9610237610301273, "yes": 0.032937376572897074}, "ground_truth": 0}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9342133693516432, "res": {"Yes": 0.9342133693516432, "yes": 0.05102429434373433}, "ground_truth": 0}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8843770058979039, "res": {"Yes": 0.8843770058979039, "yes": 0.10892488094271245}, "ground_truth": 0}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9122174548444748, "res": {"Yes": 0.9122174548444748, "yes": 0.07907690650466043}, "ground_truth": 0}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8751988773943006, "res": {"Yes": 0.8751988773943006, "yes": 0.11616343001216073}, "ground_truth": 1}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9195870531283636, "res": {"Yes": 0.9195870531283636, "yes": 0.07388493709629866}, "ground_truth": 0}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8801816784801351, "res": {"Yes": 0.8801816784801351, "yes": 0.10572071776152879}, "ground_truth": 0}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9502137383088117, "res": {"Yes": 0.9502137383088117, "yes": 0.03485527814553558}, "ground_truth": 0}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6594416149554791, "res": {"Yes": 0.6594416149554791, "yes": 0.24375294186189678}, "ground_truth": 0}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5068096447669879, "res": {"Yes": 0.5068096447669879, "yes": 0.4059479741820858}, "ground_truth": 1}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.547266933588431, "res": {"Yes": 0.547266933588431, "yes": 0.36220946568927426}, "ground_truth": 0}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.649175553757717, "res": {"Yes": 0.649175553757717, "yes": 0.28956357595986254}, "ground_truth": 0}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.845090590228717, "res": {"Yes": 0.845090590228717, "yes": 0.14389842647488368}, "ground_truth": 0}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.954194647811915, "res": {"Yes": 0.954194647811915, "yes": 0.035528851296334624}, "ground_truth": 0}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8850176328928318, "res": {"Yes": 0.8850176328928318, "yes": 0.10579608119380746}, "ground_truth": 1}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8533502893858576, "res": {"Yes": 0.8533502893858576, "yes": 0.14026891182470655}, "ground_truth": 0}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.844539348480771, "res": {"Yes": 0.844539348480771, "yes": 0.14588372188999518}, "ground_truth": 0}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9227700857843184, "res": {"Yes": 0.9227700857843184, "yes": 0.07014806896780017}, "ground_truth": 0}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7043694313657527, "res": {"Yes": 0.7043694313657527, "yes": 0.28746220162405384}, "ground_truth": 0}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6059475183778436, "res": {"Yes": 0.6059475183778436, "yes": 0.3781810612611962}, "ground_truth": 1}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7830221458953891, "res": {"Yes": 0.7830221458953891, "yes": 0.2089151479758444}, "ground_truth": 0}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8541682760920768, "res": {"Yes": 0.8541682760920768, "yes": 0.13510805679578655}, "ground_truth": 0}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.95349346944524, "res": {"Yes": 0.95349346944524, "yes": 0.04054521531826554}, "ground_truth": 0}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9341504335169675, "res": {"Yes": 0.9341504335169675, "yes": 0.05645049046154666}, "ground_truth": 0}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9331075793614928, "res": {"Yes": 0.9331075793614928, "yes": 0.05848034290086042}, "ground_truth": 1}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.911228335889535, "res": {"Yes": 0.911228335889535, "yes": 0.07632452223456358}, "ground_truth": 0}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9451604599629535, "res": {"Yes": 0.9451604599629535, "yes": 0.05010825069096071}, "ground_truth": 0}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9094442557466748, "res": {"Yes": 0.9094442557466748, "yes": 0.07618583697764102}, "ground_truth": 0}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9206195883660316, "res": {"Yes": 0.9206195883660316, "yes": 0.07512720766521044}, "ground_truth": 0}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8944879833975589, "res": {"Yes": 0.8944879833975589, "yes": 0.09678802509967631}, "ground_truth": 1}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9281379260785998, "res": {"Yes": 0.9281379260785998, "yes": 0.05918902161393005}, "ground_truth": 0}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9325733416443646, "res": {"Yes": 0.9325733416443646, "yes": 0.06209365929538196}, "ground_truth": 0}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7929355216878871, "res": {"Yes": 0.7929355216878871, "yes": 0.1996124784049531}, "ground_truth": 0}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6875723627989752, "res": {"Yes": 0.6875723627989752, "yes": 0.30423842262986434}, "ground_truth": 0}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9683768980459796, "res": {"Yes": 0.9683768980459796, "yes": 0.021865002057873832}, "ground_truth": 1}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7537563246981193, "res": {"Yes": 0.7537563246981193, "yes": 0.2384300030475697}, "ground_truth": 0}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5976307145390078, "res": {"Yes": 0.5976307145390078, "yes": 0.39309321772901307}, "ground_truth": 0}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8123263505663532, "res": {"Yes": 0.8123263505663532, "yes": 0.172486949576787}, "ground_truth": 0}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7964920427116632, "res": {"Yes": 0.7964920427116632, "yes": 0.19186490008875837}, "ground_truth": 0}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7104225820597967, "res": {"Yes": 0.7104225820597967, "yes": 0.27429379568905016}, "ground_truth": 1}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8686092322954619, "res": {"Yes": 0.8686092322954619, "yes": 0.12306498866675848}, "ground_truth": 0}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9400694683018146, "res": {"Yes": 0.9400694683018146, "yes": 0.04984177456001747}, "ground_truth": 0}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8106534322880647, "res": {"Yes": 0.8106534322880647, "yes": 0.17770046980770718}, "ground_truth": 0}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7964758661225495, "res": {"Yes": 0.7964758661225495, "yes": 0.19516674173021575}, "ground_truth": 0}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8898367464824994, "res": {"Yes": 0.8898367464824994, "yes": 0.1066841584320634}, "ground_truth": 1}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8714379374851741, "res": {"Yes": 0.8714379374851741, "yes": 0.12344987200598898}, "ground_truth": 0}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8808190990987533, "res": {"Yes": 0.8808190990987533, "yes": 0.10933295714707444}, "ground_truth": 0}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8581350079067613, "res": {"Yes": 0.8581350079067613, "yes": 0.12823986800017878}, "ground_truth": 0}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8915109670142329, "res": {"Yes": 0.8915109670142329, "yes": 0.09057491324739675}, "ground_truth": 0}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9115432014417618, "res": {"Yes": 0.9115432014417618, "yes": 0.07218079252396861}, "ground_truth": 1}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9065209030316236, "res": {"Yes": 0.9065209030316236, "yes": 0.08621812987943853}, "ground_truth": 0}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8796011613379933, "res": {"Yes": 0.8796011613379933, "yes": 0.10807384883753465}, "ground_truth": 0}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9021164727846593, "res": {"Yes": 0.9021164727846593, "yes": 0.08852245768563018}, "ground_truth": 0}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8718205827242148, "res": {"Yes": 0.8718205827242148, "yes": 0.1130933607566505}, "ground_truth": 0}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8975425771710767, "res": {"Yes": 0.8975425771710767, "yes": 0.09180804950134298}, "ground_truth": 1}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9119673499883753, "res": {"Yes": 0.9119673499883753, "yes": 0.0745045039290864}, "ground_truth": 0}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9381035796559322, "res": {"Yes": 0.9381035796559322, "yes": 0.05240325873445483}, "ground_truth": 0}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9068428385601047, "res": {"Yes": 0.9068428385601047, "yes": 0.08685807623520654}, "ground_truth": 0}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8568582985255677, "res": {"Yes": 0.8568582985255677, "yes": 0.1341157159247794}, "ground_truth": 0}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9015994196011108, "res": {"Yes": 0.9015994196011108, "yes": 0.0896264860004073}, "ground_truth": 1}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8912542755625544, "res": {"Yes": 0.8912542755625544, "yes": 0.10282824570853528}, "ground_truth": 0}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.901834154513792, "res": {"Yes": 0.901834154513792, "yes": 0.08989416905076716}, "ground_truth": 0}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8549649457574133, "res": {"Yes": 0.8549649457574133, "yes": 0.13950506083383718}, "ground_truth": 0}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8490617777550304, "res": {"Yes": 0.8490617777550304, "yes": 0.13718748294932978}, "ground_truth": 0}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8373145605824892, "res": {"Yes": 0.8373145605824892, "yes": 0.15649727207257724}, "ground_truth": 1}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9710339998600224, "res": {"Yes": 0.9710339998600224, "yes": 0.021873487324707605}, "ground_truth": 0}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8323781285300599, "res": {"Yes": 0.8323781285300599, "yes": 0.15325823038441233}, "ground_truth": 0}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8024916090861857, "res": {"Yes": 0.8024916090861857, "yes": 0.17981234494242032}, "ground_truth": 0}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9126302171323599, "res": {"Yes": 0.9126302171323599, "yes": 0.07868343724717174}, "ground_truth": 0}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8730485167015333, "res": {"Yes": 0.8730485167015333, "yes": 0.10922409388895653}, "ground_truth": 1}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8771222944328921, "res": {"Yes": 0.8771222944328921, "yes": 0.11405697211831246}, "ground_truth": 0}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9117884213102273, "res": {"Yes": 0.9117884213102273, "yes": 0.07753438217768673}, "ground_truth": 0}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7079582188170638, "res": {"Yes": 0.7079582188170638, "yes": 0.2809917414325596}, "ground_truth": 0}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7235251803439902, "res": {"Yes": 0.7235251803439902, "yes": 0.2728573084805988}, "ground_truth": 0}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8457107676849803, "res": {"Yes": 0.8457107676849803, "yes": 0.14294497685582164}, "ground_truth": 1}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.831979897592749, "res": {"Yes": 0.831979897592749, "yes": 0.13660314618631922}, "ground_truth": 0}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6596721828538502, "res": {"Yes": 0.6596721828538502, "yes": 0.32388599253638595}, "ground_truth": 0}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6513115563983413, "res": {"Yes": 0.6513115563983413, "yes": 0.3357640156991095}, "ground_truth": 0}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7461315901731845, "res": {"Yes": 0.7461315901731845, "yes": 0.24022096300926882}, "ground_truth": 0}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.45855694340620595, "res": {"yes": 0.5260285185176452, "Yes": 0.45855694340620595}, "ground_truth": 1}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4055429469246305, "res": {"yes": 0.5749137784213861, "Yes": 0.4055429469246305}, "ground_truth": 0}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.927735977203164, "res": {"Yes": 0.927735977203164, "yes": 0.05592456219877604}, "ground_truth": 0}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7603098738884261, "res": {"Yes": 0.7603098738884261, "yes": 0.22865071541909057}, "ground_truth": 0}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.75969285161594, "res": {"Yes": 0.75969285161594, "yes": 0.2305070802724627}, "ground_truth": 0}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7989085063283545, "res": {"Yes": 0.7989085063283545, "yes": 0.190147422459656}, "ground_truth": 1}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9826745421240001, "res": {"Yes": 0.9826745421240001, "yes": 0.011145367926421259}, "ground_truth": 0}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7137761865488691, "res": {"Yes": 0.7137761865488691, "yes": 0.2787826592060036}, "ground_truth": 0}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8943928059952491, "res": {"Yes": 0.8943928059952491, "yes": 0.10064232340487884}, "ground_truth": 0}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8510184417176763, "res": {"Yes": 0.8510184417176763, "yes": 0.14543097551753773}, "ground_truth": 0}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9030414491615946, "res": {"Yes": 0.9030414491615946, "yes": 0.09445258655193597}, "ground_truth": 1}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.953860903140707, "res": {"Yes": 0.953860903140707, "yes": 0.043812912461023805}, "ground_truth": 0}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9147433269988499, "res": {"Yes": 0.9147433269988499, "yes": 0.0815708509157314}, "ground_truth": 0}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7320011163160954, "res": {"Yes": 0.7320011163160954, "yes": 0.25088091366148885}, "ground_truth": 0}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8286787367648576, "res": {"Yes": 0.8286787367648576, "yes": 0.15640493463405847}, "ground_truth": 0}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7724746007799705, "res": {"Yes": 0.7724746007799705, "yes": 0.2129934425280297}, "ground_truth": 1}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.934145645074118, "res": {"Yes": 0.934145645074118, "yes": 0.058085924953874435}, "ground_truth": 0}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.772154785270159, "res": {"Yes": 0.772154785270159, "yes": 0.21719537171764824}, "ground_truth": 0}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8816681155616923, "res": {"Yes": 0.8816681155616923, "yes": 0.10593631153700944}, "ground_truth": 0}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7093022206615425, "res": {"Yes": 0.7093022206615425, "yes": 0.25461030969059134}, "ground_truth": 0}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7364286536039818, "res": {"Yes": 0.7364286536039818, "yes": 0.23602387826164914}, "ground_truth": 1}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8686266828302288, "res": {"Yes": 0.8686266828302288, "yes": 0.11369945176557776}, "ground_truth": 0}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9220068702399473, "res": {"Yes": 0.9220068702399473, "yes": 0.06780513835232643}, "ground_truth": 0}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7744826662064515, "res": {"Yes": 0.7744826662064515, "yes": 0.20557866916107717}, "ground_truth": 0}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8553012524983036, "res": {"Yes": 0.8553012524983036, "yes": 0.12344988435097677}, "ground_truth": 0}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7650866757039109, "res": {"Yes": 0.7650866757039109, "yes": 0.21235947574043637}, "ground_truth": 1}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9008143693129238, "res": {"Yes": 0.9008143693129238, "yes": 0.08316688726310557}, "ground_truth": 0}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8712914000891695, "res": {"Yes": 0.8712914000891695, "yes": 0.11453242595119245}, "ground_truth": 0}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6038934765071098, "res": {"Yes": 0.6038934765071098, "yes": 0.3655181657254997}, "ground_truth": 0}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8029459639616034, "res": {"Yes": 0.8029459639616034, "yes": 0.18320106746226394}, "ground_truth": 0}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8258691021438345, "res": {"Yes": 0.8258691021438345, "yes": 0.1559689234860484}, "ground_truth": 1}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7282838521353641, "res": {"Yes": 0.7282838521353641, "yes": 0.2511838846272404}, "ground_truth": 0}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7003019148473796, "res": {"Yes": 0.7003019148473796, "yes": 0.29181346480695947}, "ground_truth": 0}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9566346369472597, "res": {"Yes": 0.9566346369472597, "yes": 0.037142166181526964}, "ground_truth": 0}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.932117597202803, "res": {"Yes": 0.932117597202803, "yes": 0.05742431839266575}, "ground_truth": 0}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9337586115818692, "res": {"Yes": 0.9337586115818692, "yes": 0.05716044161532003}, "ground_truth": 1}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.981388104842953, "res": {"Yes": 0.981388104842953, "yes": 0.015387906042575904}, "ground_truth": 0}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9323216833867394, "res": {"Yes": 0.9323216833867394, "yes": 0.058064576491949085}, "ground_truth": 0}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8290091885599543, "res": {"Yes": 0.8290091885599543, "yes": 0.16255104556271044}, "ground_truth": 0}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8541251502245386, "res": {"Yes": 0.8541251502245386, "yes": 0.13752932663489822}, "ground_truth": 0}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.894175535185652, "res": {"Yes": 0.894175535185652, "yes": 0.09799738346546388}, "ground_truth": 1}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8968755381557907, "res": {"Yes": 0.8968755381557907, "yes": 0.0921693530588804}, "ground_truth": 0}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.925437176386778, "res": {"Yes": 0.925437176386778, "yes": 0.06597971715633494}, "ground_truth": 0}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6689706107088222, "res": {"Yes": 0.6689706107088222, "yes": 0.26770295233390556}, "ground_truth": 0}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.36091698700038527, "res": {"yes": 0.5894649016281838, "Yes": 0.36091698700038527}, "ground_truth": 0}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6061244445210113, "res": {"Yes": 0.6061244445210113, "yes": 0.38462230584205687}, "ground_truth": 1}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6142343783351855, "res": {"Yes": 0.6142343783351855, "yes": 0.32078582191243754}, "ground_truth": 0}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.2909028838459119, "res": {"yes": 0.5515709662296765, "Yes": 0.2909028838459119}, "ground_truth": 0}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8465129916813677, "res": {"Yes": 0.8465129916813677, "yes": 0.14850071588152386}, "ground_truth": 0}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9220133556590818, "res": {"Yes": 0.9220133556590818, "yes": 0.07284435043501132}, "ground_truth": 0}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8882407925201709, "res": {"Yes": 0.8882407925201709, "yes": 0.10653294488774064}, "ground_truth": 1}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8834073762615279, "res": {"Yes": 0.8834073762615279, "yes": 0.10946594310227008}, "ground_truth": 0}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.828492719269699, "res": {"Yes": 0.828492719269699, "yes": 0.1644214550014628}, "ground_truth": 0}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7674103071485278, "res": {"Yes": 0.7674103071485278, "yes": 0.22519258714978485}, "ground_truth": 0}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6967275041312087, "res": {"Yes": 0.6967275041312087, "yes": 0.29382950347948017}, "ground_truth": 0}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8353953526151113, "res": {"Yes": 0.8353953526151113, "yes": 0.16087677173672413}, "ground_truth": 1}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5574371374180795, "res": {"Yes": 0.5574371374180795, "yes": 0.4351762315223461}, "ground_truth": 0}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7852599653281277, "res": {"Yes": 0.7852599653281277, "yes": 0.20132996499755032}, "ground_truth": 0}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9301528234914836, "res": {"Yes": 0.9301528234914836, "yes": 0.06009078962602338}, "ground_truth": 0}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8894985784700578, "res": {"Yes": 0.8894985784700578, "yes": 0.10391098082333695}, "ground_truth": 0}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9832127479886861, "res": {"Yes": 0.9832127479886861, "yes": 0.012147085676299997}, "ground_truth": 1}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9529489637641486, "res": {"Yes": 0.9529489637641486, "yes": 0.040751872069282505}, "ground_truth": 0}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.940620119151769, "res": {"Yes": 0.940620119151769, "yes": 0.05096693448268987}, "ground_truth": 0}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7704175179381947, "res": {"Yes": 0.7704175179381947, "yes": 0.2217177948793034}, "ground_truth": 0}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.768713323705881, "res": {"Yes": 0.768713323705881, "yes": 0.22562485332010518}, "ground_truth": 0}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.869501578307224, "res": {"Yes": 0.869501578307224, "yes": 0.11593000212149897}, "ground_truth": 1}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7206953998238715, "res": {"Yes": 0.7206953998238715, "yes": 0.2736546962552753}, "ground_truth": 0}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.48522683322977744, "res": {"yes": 0.5061788062906417, "Yes": 0.48522683322977744}, "ground_truth": 0}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9147996860713496, "res": {"Yes": 0.9147996860713496, "yes": 0.07748710837560792}, "ground_truth": 0}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8621567933687236, "res": {"Yes": 0.8621567933687236, "yes": 0.12569821047035054}, "ground_truth": 0}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8445956389042047, "res": {"Yes": 0.8445956389042047, "yes": 0.14770064832242027}, "ground_truth": 1}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8858445457813726, "res": {"Yes": 0.8858445457813726, "yes": 0.10399063832481556}, "ground_truth": 0}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9002595755178752, "res": {"Yes": 0.9002595755178752, "yes": 0.08877790300354536}, "ground_truth": 0}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6425853182359464, "res": {"Yes": 0.6425853182359464, "yes": 0.3543336130116843}, "ground_truth": 0}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9615033594419693, "res": {"Yes": 0.9615033594419693, "yes": 0.03520992767222663}, "ground_truth": 0}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9353399377339435, "res": {"Yes": 0.9353399377339435, "yes": 0.059196373346947675}, "ground_truth": 1}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9535304208936466, "res": {"Yes": 0.9535304208936466, "yes": 0.039863494448078636}, "ground_truth": 0}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6742123488831154, "res": {"Yes": 0.6742123488831154, "yes": 0.31964837994419454}, "ground_truth": 0}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9230911687316499, "res": {"Yes": 0.9230911687316499, "yes": 0.06762214235065257}, "ground_truth": 0}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8026752642531974, "res": {"Yes": 0.8026752642531974, "yes": 0.18842698398486674}, "ground_truth": 0}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7981410043188607, "res": {"Yes": 0.7981410043188607, "yes": 0.18745066278692224}, "ground_truth": 1}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7126279029767703, "res": {"Yes": 0.7126279029767703, "yes": 0.27822776560436896}, "ground_truth": 0}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8927180646785978, "res": {"Yes": 0.8927180646785978, "yes": 0.09565259053320012}, "ground_truth": 0}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9485820958549349, "res": {"Yes": 0.9485820958549349, "yes": 0.04636488940708592}, "ground_truth": 0}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.908006615149903, "res": {"Yes": 0.908006615149903, "yes": 0.08443311470554135}, "ground_truth": 0}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8610876323506399, "res": {"Yes": 0.8610876323506399, "yes": 0.13281959501511365}, "ground_truth": 1}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8716162344958042, "res": {"Yes": 0.8716162344958042, "yes": 0.12336848365615927}, "ground_truth": 0}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8989875170316208, "res": {"Yes": 0.8989875170316208, "yes": 0.09473515715581451}, "ground_truth": 0}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5645246101354616, "res": {"Yes": 0.5645246101354616, "yes": 0.42858022311332067}, "ground_truth": 0}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9523900459190627, "res": {"Yes": 0.9523900459190627, "yes": 0.04326436335689654}, "ground_truth": 0}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9429831547272643, "res": {"Yes": 0.9429831547272643, "yes": 0.049773468398626224}, "ground_truth": 1}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9389646189342553, "res": {"Yes": 0.9389646189342553, "yes": 0.05214214145863445}, "ground_truth": 0}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6769138274940315, "res": {"Yes": 0.6769138274940315, "yes": 0.3178033448243138}, "ground_truth": 0}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8015433037561963, "res": {"Yes": 0.8015433037561963, "yes": 0.18404330422766518}, "ground_truth": 0}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8599582305861259, "res": {"Yes": 0.8599582305861259, "yes": 0.12098941700246915}, "ground_truth": 0}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6753022368683301, "res": {"Yes": 0.6753022368683301, "yes": 0.29920608366008794}, "ground_truth": 1}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7695213044435277, "res": {"Yes": 0.7695213044435277, "yes": 0.2156101204901626}, "ground_truth": 0}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.876608933473866, "res": {"Yes": 0.876608933473866, "yes": 0.09465651226960595}, "ground_truth": 0}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9714452188195154, "res": {"Yes": 0.9714452188195154, "yes": 0.016581222476444904}, "ground_truth": 0}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9772073393844662, "res": {"Yes": 0.9772073393844662, "yes": 0.015402280440091044}, "ground_truth": 0}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8395233237029761, "res": {"Yes": 0.8395233237029761, "yes": 0.14651987414985365}, "ground_truth": 1}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8521622636438209, "res": {"Yes": 0.8521622636438209, "yes": 0.14074784822514724}, "ground_truth": 0}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8226015244320626, "res": {"Yes": 0.8226015244320626, "yes": 0.16612800056833035}, "ground_truth": 0}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7251646981968162, "res": {"Yes": 0.7251646981968162, "yes": 0.2693040608496395}, "ground_truth": 0}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9789369959875561, "res": {"Yes": 0.9789369959875561, "yes": 0.01895333260225387}, "ground_truth": 0}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8258000788887994, "res": {"Yes": 0.8258000788887994, "yes": 0.17003880555677794}, "ground_truth": 1}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8261594874661825, "res": {"Yes": 0.8261594874661825, "yes": 0.16961369670692275}, "ground_truth": 0}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9872528263185278, "res": {"Yes": 0.9872528263185278, "yes": 0.008059277231465151}, "ground_truth": 0}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9126262380732876, "res": {"Yes": 0.9126262380732876, "yes": 0.08164723697528763}, "ground_truth": 0}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9840635959380203, "res": {"Yes": 0.9840635959380203, "yes": 0.00933417670786909}, "ground_truth": 0}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8231177124823895, "res": {"Yes": 0.8231177124823895, "yes": 0.16896166907356197}, "ground_truth": 1}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9304390104221602, "res": {"Yes": 0.9304390104221602, "yes": 0.0633615099803097}, "ground_truth": 0}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9206164582647524, "res": {"Yes": 0.9206164582647524, "yes": 0.07131732455079065}, "ground_truth": 0}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7276534313310221, "res": {"Yes": 0.7276534313310221, "yes": 0.26104222581849307}, "ground_truth": 0}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7405460644383176, "res": {"Yes": 0.7405460644383176, "yes": 0.24519634039771376}, "ground_truth": 0}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7730114639801222, "res": {"Yes": 0.7730114639801222, "yes": 0.21685173669371452}, "ground_truth": 1}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6750337900697113, "res": {"Yes": 0.6750337900697113, "yes": 0.313094380108882}, "ground_truth": 0}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.510284610049576, "res": {"Yes": 0.510284610049576, "yes": 0.47186501003014314}, "ground_truth": 0}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5849035015337245, "res": {"Yes": 0.5849035015337245, "yes": 0.4111755856595038}, "ground_truth": 0}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9533790408863412, "res": {"Yes": 0.9533790408863412, "yes": 0.04133181651236763}, "ground_truth": 0}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9576794532022472, "res": {"Yes": 0.9576794532022472, "yes": 0.03549185661293069}, "ground_truth": 1}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9766829377164218, "res": {"Yes": 0.9766829377164218, "yes": 0.017970747175547447}, "ground_truth": 0}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9821908443445552, "res": {"Yes": 0.9821908443445552, "yes": 0.015379595740546379}, "ground_truth": 0}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7900373074625331, "res": {"Yes": 0.7900373074625331, "yes": 0.20312719902246187}, "ground_truth": 0}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6944457386039207, "res": {"Yes": 0.6944457386039207, "yes": 0.29209041039420003}, "ground_truth": 0}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6071109988366858, "res": {"Yes": 0.6071109988366858, "yes": 0.3794116331443199}, "ground_truth": 1}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6279231453618722, "res": {"Yes": 0.6279231453618722, "yes": 0.3633048826957956}, "ground_truth": 0}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6368651762821691, "res": {"Yes": 0.6368651762821691, "yes": 0.3511988822098557}, "ground_truth": 0}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8085236004311289, "res": {"Yes": 0.8085236004311289, "yes": 0.18866545721288935}, "ground_truth": 0}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7693539055988395, "res": {"Yes": 0.7693539055988395, "yes": 0.22404277175948442}, "ground_truth": 0}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7932222276287424, "res": {"Yes": 0.7932222276287424, "yes": 0.20315289623845287}, "ground_truth": 1}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9795108202020552, "res": {"Yes": 0.9795108202020552, "yes": 0.017932671614194953}, "ground_truth": 0}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7248620486186157, "res": {"Yes": 0.7248620486186157, "yes": 0.26586933837492005}, "ground_truth": 0}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7663446543727429, "res": {"Yes": 0.7663446543727429, "yes": 0.2214208698614304}, "ground_truth": 0}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7417423376220389, "res": {"Yes": 0.7417423376220389, "yes": 0.24504566452224613}, "ground_truth": 0}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.648682199113881, "res": {"Yes": 0.648682199113881, "yes": 0.33722487037963556}, "ground_truth": 1}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8351119260004046, "res": {"Yes": 0.8351119260004046, "yes": 0.15537242532570963}, "ground_truth": 0}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9136850601929394, "res": {"Yes": 0.9136850601929394, "yes": 0.0832626508087435}, "ground_truth": 0}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7851306829183029, "res": {"Yes": 0.7851306829183029, "yes": 0.2084995283200076}, "ground_truth": 0}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8352622680293323, "res": {"Yes": 0.8352622680293323, "yes": 0.1618207884864484}, "ground_truth": 0}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7960921129868909, "res": {"Yes": 0.7960921129868909, "yes": 0.19940546791297592}, "ground_truth": 1}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6944045939128319, "res": {"Yes": 0.6944045939128319, "yes": 0.28829174733970164}, "ground_truth": 0}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.785905769399532, "res": {"Yes": 0.785905769399532, "yes": 0.21119753470847497}, "ground_truth": 0}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8477681064178414, "res": {"Yes": 0.8477681064178414, "yes": 0.14571300660480221}, "ground_truth": 0}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.842076882100717, "res": {"Yes": 0.842076882100717, "yes": 0.14829149224906044}, "ground_truth": 0}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5949367390579408, "res": {"Yes": 0.5949367390579408, "yes": 0.39923578371399987}, "ground_truth": 1}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8679721934767138, "res": {"Yes": 0.8679721934767138, "yes": 0.1215998917091768}, "ground_truth": 0}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7745039647726261, "res": {"Yes": 0.7745039647726261, "yes": 0.2182693780254935}, "ground_truth": 0}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8340540004320615, "res": {"Yes": 0.8340540004320615, "yes": 0.15605549465864127}, "ground_truth": 0}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8044374304745135, "res": {"Yes": 0.8044374304745135, "yes": 0.18118037445252047}, "ground_truth": 0}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7483778511170747, "res": {"Yes": 0.7483778511170747, "yes": 0.23130617804646583}, "ground_truth": 1}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9078326849830717, "res": {"Yes": 0.9078326849830717, "yes": 0.08036997643210451}, "ground_truth": 0}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8578465342524264, "res": {"Yes": 0.8578465342524264, "yes": 0.13574423348529968}, "ground_truth": 0}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8956329804122862, "res": {"Yes": 0.8956329804122862, "yes": 0.09464421718724501}, "ground_truth": 0}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9124897376497179, "res": {"Yes": 0.9124897376497179, "yes": 0.07680962662531197}, "ground_truth": 0}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9152195473767128, "res": {"Yes": 0.9152195473767128, "yes": 0.07688366067607891}, "ground_truth": 1}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9292288131601206, "res": {"Yes": 0.9292288131601206, "yes": 0.060276504176263956}, "ground_truth": 0}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9419569237071773, "res": {"Yes": 0.9419569237071773, "yes": 0.051372760415581205}, "ground_truth": 0}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9236602590167324, "res": {"Yes": 0.9236602590167324, "yes": 0.06804159973816856}, "ground_truth": 0}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9100742448974037, "res": {"Yes": 0.9100742448974037, "yes": 0.08209351127962966}, "ground_truth": 0}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9084264881953459, "res": {"Yes": 0.9084264881953459, "yes": 0.08439286659236243}, "ground_truth": 1}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9138714983746149, "res": {"Yes": 0.9138714983746149, "yes": 0.08007726483224326}, "ground_truth": 0}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8544062378059614, "res": {"Yes": 0.8544062378059614, "yes": 0.13808604135957916}, "ground_truth": 0}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7120714937791531, "res": {"Yes": 0.7120714937791531, "yes": 0.28499418569265356}, "ground_truth": 0}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.513562547910623, "res": {"Yes": 0.513562547910623, "yes": 0.46387173414658445}, "ground_truth": 0}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.679739256955326, "res": {"Yes": 0.679739256955326, "yes": 0.3119716065379157}, "ground_truth": 1}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.40261137996242813, "res": {"yes": 0.5831329115324402, "Yes": 0.40261137996242813}, "ground_truth": 0}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9685358895317908, "res": {"Yes": 0.9685358895317908, "yes": 0.022346423345784416}, "ground_truth": 0}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.794235017276291, "res": {"Yes": 0.794235017276291, "yes": 0.18589577719091824}, "ground_truth": 0}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9034348319465413, "res": {"Yes": 0.9034348319465413, "yes": 0.08580997675948938}, "ground_truth": 0}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8418412093457673, "res": {"Yes": 0.8418412093457673, "yes": 0.1443628149330462}, "ground_truth": 1}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.875429032444281, "res": {"Yes": 0.875429032444281, "yes": 0.11283925445146983}, "ground_truth": 0}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9267463179844274, "res": {"Yes": 0.9267463179844274, "yes": 0.06700817179091247}, "ground_truth": 0}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8534070646692193, "res": {"Yes": 0.8534070646692193, "yes": 0.13742536627046473}, "ground_truth": 0}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8930739953538498, "res": {"Yes": 0.8930739953538498, "yes": 0.10110482793909567}, "ground_truth": 0}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.882822683483908, "res": {"Yes": 0.882822683483908, "yes": 0.10922785126241277}, "ground_truth": 1}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9800120455651968, "res": {"Yes": 0.9800120455651968, "yes": 0.010894480436218558}, "ground_truth": 0}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7847554251995961, "res": {"Yes": 0.7847554251995961, "yes": 0.2063232634736445}, "ground_truth": 0}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8936914961228235, "res": {"Yes": 0.8936914961228235, "yes": 0.10054366180564074}, "ground_truth": 0}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9442193608967031, "res": {"Yes": 0.9442193608967031, "yes": 0.043826461508131485}, "ground_truth": 0}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9013189305810442, "res": {"Yes": 0.9013189305810442, "yes": 0.09210476498315152}, "ground_truth": 1}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9181351839485548, "res": {"Yes": 0.9181351839485548, "yes": 0.07943604869271159}, "ground_truth": 0}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.912134036372597, "res": {"Yes": 0.912134036372597, "yes": 0.08465902166532605}, "ground_truth": 0}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7085321240129617, "res": {"Yes": 0.7085321240129617, "yes": 0.2835746428480134}, "ground_truth": 0}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7797270472224479, "res": {"Yes": 0.7797270472224479, "yes": 0.21536971996009222}, "ground_truth": 0}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7784988226816336, "res": {"Yes": 0.7784988226816336, "yes": 0.21815315870670896}, "ground_truth": 1}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7094433716600569, "res": {"Yes": 0.7094433716600569, "yes": 0.28122103990271324}, "ground_truth": 0}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7631387776725352, "res": {"Yes": 0.7631387776725352, "yes": 0.22222206595033805}, "ground_truth": 0}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7328250809651636, "res": {"Yes": 0.7328250809651636, "yes": 0.22571211932267818}, "ground_truth": 0}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7376855494040164, "res": {"Yes": 0.7376855494040164, "yes": 0.23357199958562194}, "ground_truth": 0}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7556796016315516, "res": {"Yes": 0.7556796016315516, "yes": 0.22253666598690444}, "ground_truth": 1}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7750583981624246, "res": {"Yes": 0.7750583981624246, "yes": 0.2079383577862599}, "ground_truth": 0}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9824169834744894, "res": {"Yes": 0.9824169834744894, "yes": 0.010012429578206595}, "ground_truth": 0}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8482583852622672, "res": {"Yes": 0.8482583852622672, "yes": 0.14039518259226524}, "ground_truth": 0}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9021194768375155, "res": {"Yes": 0.9021194768375155, "yes": 0.08829568853742951}, "ground_truth": 0}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8730734164002976, "res": {"Yes": 0.8730734164002976, "yes": 0.11912230983142685}, "ground_truth": 1}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9211045051000153, "res": {"Yes": 0.9211045051000153, "yes": 0.0718462399333325}, "ground_truth": 0}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9132107158498282, "res": {"Yes": 0.9132107158498282, "yes": 0.0802056995441095}, "ground_truth": 0}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5738330377356018, "res": {"Yes": 0.5738330377356018, "yes": 0.4222371736308}, "ground_truth": 0}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5294392242697579, "res": {"Yes": 0.5294392242697579, "yes": 0.4659998345031543}, "ground_truth": 0}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9578408520695071, "res": {"Yes": 0.9578408520695071, "yes": 0.0355106190166791}, "ground_truth": 1}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9769152039287848, "res": {"Yes": 0.9769152039287848, "yes": 0.017777102965421517}, "ground_truth": 0}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.707780310035772, "res": {"Yes": 0.707780310035772, "yes": 0.2882562319842202}, "ground_truth": 0}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8267039241256368, "res": {"Yes": 0.8267039241256368, "yes": 0.1588466194326236}, "ground_truth": 0}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.747527937375737, "res": {"Yes": 0.747527937375737, "yes": 0.2369207694163328}, "ground_truth": 0}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8377094893705489, "res": {"Yes": 0.8377094893705489, "yes": 0.1501734078900194}, "ground_truth": 1}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9013872982147384, "res": {"Yes": 0.9013872982147384, "yes": 0.09173933853983945}, "ground_truth": 0}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7618429857302113, "res": {"Yes": 0.7618429857302113, "yes": 0.22153160797858334}, "ground_truth": 0}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6839296483081589, "res": {"Yes": 0.6839296483081589, "yes": 0.3096064822455885}, "ground_truth": 0}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7446346377685311, "res": {"Yes": 0.7446346377685311, "yes": 0.24985513226951328}, "ground_truth": 0}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7543410441806813, "res": {"Yes": 0.7543410441806813, "yes": 0.23345764737884792}, "ground_truth": 1}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8275248681809996, "res": {"Yes": 0.8275248681809996, "yes": 0.1678354405765739}, "ground_truth": 0}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7389273436389948, "res": {"Yes": 0.7389273436389948, "yes": 0.25507218383979846}, "ground_truth": 0}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8176573153868005, "res": {"Yes": 0.8176573153868005, "yes": 0.17121385325545604}, "ground_truth": 0}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7103061746493762, "res": {"Yes": 0.7103061746493762, "yes": 0.26841505065259974}, "ground_truth": 0}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.695209221910684, "res": {"Yes": 0.695209221910684, "yes": 0.29266680539512757}, "ground_truth": 1}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.893969026841463, "res": {"Yes": 0.893969026841463, "yes": 0.09279477914369653}, "ground_truth": 0}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.798120747757228, "res": {"Yes": 0.798120747757228, "yes": 0.17888568650945902}, "ground_truth": 0}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9698440147034774, "res": {"Yes": 0.9698440147034774, "yes": 0.02431955548028554}, "ground_truth": 0}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9816903855557509, "res": {"Yes": 0.9816903855557509, "yes": 0.013360247307560722}, "ground_truth": 0}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6942803343857794, "res": {"Yes": 0.6942803343857794, "yes": 0.2958358024575873}, "ground_truth": 1}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8335308874078294, "res": {"Yes": 0.8335308874078294, "yes": 0.15444451088547323}, "ground_truth": 0}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.4835656657356658, "res": {"yes": 0.5076053821833647, "Yes": 0.4835656657356658}, "ground_truth": 0}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.922008386020488, "res": {"Yes": 0.922008386020488, "yes": 0.07042658917352737}, "ground_truth": 0}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9113159310120234, "res": {"Yes": 0.9113159310120234, "yes": 0.08628621734175099}, "ground_truth": 0}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8070232344392023, "res": {"Yes": 0.8070232344392023, "yes": 0.18707362930943638}, "ground_truth": 1}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9778124238469249, "res": {"Yes": 0.9778124238469249, "yes": 0.014037274825111494}, "ground_truth": 0}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7574101973344847, "res": {"Yes": 0.7574101973344847, "yes": 0.24041160590773172}, "ground_truth": 0}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8167415372895003, "res": {"Yes": 0.8167415372895003, "yes": 0.17472230394747068}, "ground_truth": 0}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8958190137439507, "res": {"Yes": 0.8958190137439507, "yes": 0.09411019463016893}, "ground_truth": 0}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7588378202335555, "res": {"Yes": 0.7588378202335555, "yes": 0.22798113378691484}, "ground_truth": 1}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9520385417297748, "res": {"Yes": 0.9520385417297748, "yes": 0.04276884337555131}, "ground_truth": 0}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9493937272066771, "res": {"Yes": 0.9493937272066771, "yes": 0.04797911341314824}, "ground_truth": 0}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7980152032676203, "res": {"Yes": 0.7980152032676203, "yes": 0.1852951907289204}, "ground_truth": 0}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8979990666910689, "res": {"Yes": 0.8979990666910689, "yes": 0.09128815837110213}, "ground_truth": 0}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7945098145233829, "res": {"Yes": 0.7945098145233829, "yes": 0.18513001813923302}, "ground_truth": 1}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.784215408994353, "res": {"Yes": 0.784215408994353, "yes": 0.20212068691779697}, "ground_truth": 0}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8415797319901571, "res": {"Yes": 0.8415797319901571, "yes": 0.141011955700006}, "ground_truth": 0}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8340148591962775, "res": {"Yes": 0.8340148591962775, "yes": 0.1498694046164152}, "ground_truth": 0}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9819922781987328, "res": {"Yes": 0.9819922781987328, "yes": 0.012700054426190033}, "ground_truth": 0}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9813883315436315, "res": {"Yes": 0.9813883315436315, "yes": 0.011663854244252196}, "ground_truth": 1}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7373462920864559, "res": {"Yes": 0.7373462920864559, "yes": 0.2533096058182774}, "ground_truth": 0}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9724754383537455, "res": {"Yes": 0.9724754383537455, "yes": 0.01857583939309245}, "ground_truth": 0}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.792344053905678, "res": {"Yes": 0.792344053905678, "yes": 0.19529879717043716}, "ground_truth": 0}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8662037681528314, "res": {"Yes": 0.8662037681528314, "yes": 0.1246392143790688}, "ground_truth": 0}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.868784040118976, "res": {"Yes": 0.868784040118976, "yes": 0.1235328705535017}, "ground_truth": 1}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8585639680544519, "res": {"Yes": 0.8585639680544519, "yes": 0.13155884986294558}, "ground_truth": 0}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.776339852826306, "res": {"Yes": 0.776339852826306, "yes": 0.2157230866545309}, "ground_truth": 0}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9323236505875667, "res": {"Yes": 0.9323236505875667, "yes": 0.05574758432912394}, "ground_truth": 0}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5820708227967228, "res": {"Yes": 0.5820708227967228, "yes": 0.41333614775072963}, "ground_truth": 0}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6068588451640147, "res": {"Yes": 0.6068588451640147, "yes": 0.38971645606806316}, "ground_truth": 1}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5327026981638603, "res": {"Yes": 0.5327026981638603, "yes": 0.46252141055928175}, "ground_truth": 0}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8206278918261111, "res": {"Yes": 0.8206278918261111, "yes": 0.17232361282559414}, "ground_truth": 0}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5957878986521181, "res": {"Yes": 0.5957878986521181, "yes": 0.37526343736841367}, "ground_truth": 0}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7775856190709839, "res": {"Yes": 0.7775856190709839, "yes": 0.1962047151057977}, "ground_truth": 0}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9068930337003567, "res": {"Yes": 0.9068930337003567, "yes": 0.07242703048700047}, "ground_truth": 1}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8950867213841974, "res": {"Yes": 0.8950867213841974, "yes": 0.08851950108491899}, "ground_truth": 0}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8609517978806649, "res": {"Yes": 0.8609517978806649, "yes": 0.10427540030197495}, "ground_truth": 0}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8226317802725327, "res": {"Yes": 0.8226317802725327, "yes": 0.17028550660745365}, "ground_truth": 0}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8246318126173605, "res": {"Yes": 0.8246318126173605, "yes": 0.162736654704339}, "ground_truth": 0}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6554491389610166, "res": {"Yes": 0.6554491389610166, "yes": 0.33238125848704625}, "ground_truth": 1}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9043882868100861, "res": {"Yes": 0.9043882868100861, "yes": 0.09243773465391936}, "ground_truth": 0}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9925751219132154, "res": {"Yes": 0.9925751219132154, "yes": 0.0038259043816654087}, "ground_truth": 0}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7766200300805408, "res": {"Yes": 0.7766200300805408, "yes": 0.21932665173570806}, "ground_truth": 0}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6033843674704829, "res": {"Yes": 0.6033843674704829, "yes": 0.39152776755500096}, "ground_truth": 0}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7660738836819794, "res": {"Yes": 0.7660738836819794, "yes": 0.22724237496427993}, "ground_truth": 1}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8729942846121419, "res": {"Yes": 0.8729942846121419, "yes": 0.12212281014975215}, "ground_truth": 0}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9411929458537835, "res": {"Yes": 0.9411929458537835, "yes": 0.050679068689016935}, "ground_truth": 0}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7377285503479415, "res": {"Yes": 0.7377285503479415, "yes": 0.24395453573628065}, "ground_truth": 0}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.892897380645053, "res": {"Yes": 0.892897380645053, "yes": 0.09278833940948532}, "ground_truth": 0}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9794910352628166, "res": {"Yes": 0.9794910352628166, " Yes": 0.012307148580747267}, "ground_truth": 1}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8294835921144634, "res": {"Yes": 0.8294835921144634, "yes": 0.14126826607093446}, "ground_truth": 0}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8659592993199268, "res": {"Yes": 0.8659592993199268, "yes": 0.1192369606316777}, "ground_truth": 0}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7256494732260137, "res": {"Yes": 0.7256494732260137, "yes": 0.2628423910538628}, "ground_truth": 0}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8315655583346091, "res": {"Yes": 0.8315655583346091, "yes": 0.16157418369820778}, "ground_truth": 0}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7125449721672592, "res": {"Yes": 0.7125449721672592, "yes": 0.2842450388437068}, "ground_truth": 1}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7915216396375458, "res": {"Yes": 0.7915216396375458, "yes": 0.20382622222665384}, "ground_truth": 0}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8459302834847533, "res": {"Yes": 0.8459302834847533, "yes": 0.1502254069243444}, "ground_truth": 0}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7317578249447089, "res": {"Yes": 0.7317578249447089, "yes": 0.18136456539336168}, "ground_truth": 0}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7519588780286746, "res": {"Yes": 0.7519588780286746, "yes": 0.1383114849519204}, "ground_truth": 0}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6458004654010797, "res": {"Yes": 0.6458004654010797, "yes": 0.280744128277229}, "ground_truth": 1}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9007114661573137, "res": {"Yes": 0.9007114661573137, "yes": 0.0939529926485365}, "ground_truth": 0}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8433484594355041, "res": {"Yes": 0.8433484594355041, "yes": 0.09185211875787103}, "ground_truth": 0}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9210854844883735, "res": {"Yes": 0.9210854844883735, " Yes": 0.06034645945617961}, "ground_truth": 0}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8979886894738138, "res": {"Yes": 0.8979886894738138, "yes": 0.09886230060804335}, "ground_truth": 0}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9672956634042048, "res": {"Yes": 0.9672956634042048, " Yes": 0.016938807292494303}, "ground_truth": 1}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9231639065659413, "res": {"Yes": 0.9231639065659413, "yes": 0.0700071569511406}, "ground_truth": 0}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9816504119371195, "res": {"Yes": 0.9816504119371195, " Yes": 0.0120996881432101}, "ground_truth": 0}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8945622021444635, "res": {"Yes": 0.8945622021444635, "yes": 0.09807211535131899}, "ground_truth": 0}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9299669045252875, "res": {"Yes": 0.9299669045252875, "yes": 0.0649208138640181}, "ground_truth": 0}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8166708595351339, "res": {"Yes": 0.8166708595351339, "yes": 0.17190365576626673}, "ground_truth": 1}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.880212608607756, "res": {"Yes": 0.880212608607756, "yes": 0.10606849651770252}, "ground_truth": 0}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8498229205210485, "res": {"Yes": 0.8498229205210485, "yes": 0.14514651477428683}, "ground_truth": 0}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7839347885195034, "res": {"Yes": 0.7839347885195034, "yes": 0.1999707646237964}, "ground_truth": 0}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.832998697868158, "res": {"Yes": 0.832998697868158, "yes": 0.14695142338181763}, "ground_truth": 0}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8633385936019446, "res": {"Yes": 0.8633385936019446, "yes": 0.12217241429439385}, "ground_truth": 1}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8428906118141297, "res": {"Yes": 0.8428906118141297, "yes": 0.1256206032231488}, "ground_truth": 0}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8786171306181874, "res": {"Yes": 0.8786171306181874, "yes": 0.10380812871465478}, "ground_truth": 0}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5687740351531244, "res": {"Yes": 0.5687740351531244, "yes": 0.42218629711674904}, "ground_truth": 0}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7566060049940054, "res": {"Yes": 0.7566060049940054, "yes": 0.232800131095503}, "ground_truth": 0}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7077096770854733, "res": {"Yes": 0.7077096770854733, "yes": 0.28306480913866705}, "ground_truth": 1}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8409570167551954, "res": {"Yes": 0.8409570167551954, "yes": 0.15342156802960336}, "ground_truth": 0}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.637560228712759, "res": {"Yes": 0.637560228712759, "yes": 0.35131001911711845}, "ground_truth": 0}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9509478518581717, "res": {"Yes": 0.9509478518581717, "yes": 0.04669466783258463}, "ground_truth": 0}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8165097299386991, "res": {"Yes": 0.8165097299386991, "yes": 0.17946278831182108}, "ground_truth": 0}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8630545501405676, "res": {"Yes": 0.8630545501405676, "yes": 0.1320047859310009}, "ground_truth": 1}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9090412829948568, "res": {"Yes": 0.9090412829948568, "yes": 0.08787220815465902}, "ground_truth": 0}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9078016330971498, "res": {"Yes": 0.9078016330971498, "yes": 0.0885145618345648}, "ground_truth": 0}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9006859403560651, "res": {"Yes": 0.9006859403560651, "yes": 0.09038493262300767}, "ground_truth": 0}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8449543134582036, "res": {"Yes": 0.8449543134582036, "yes": 0.14864545577028818}, "ground_truth": 0}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8829105816857756, "res": {"Yes": 0.8829105816857756, "yes": 0.11154561149778267}, "ground_truth": 1}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8888270240697166, "res": {"Yes": 0.8888270240697166, "yes": 0.10204576396448733}, "ground_truth": 0}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8695813500410914, "res": {"Yes": 0.8695813500410914, "yes": 0.12461553517793214}, "ground_truth": 0}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.3750638878209842, "res": {"yes": 0.528876443799996, "Yes": 0.3750638878209842}, "ground_truth": 0}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.3863069802374788, "res": {"yes": 0.5599968333506009, "Yes": 0.3863069802374788}, "ground_truth": 0}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.39605796586221426, "res": {"yes": 0.45523040663210323, "Yes": 0.39605796586221426}, "ground_truth": 1}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4598396671180252, "res": {"Yes": 0.4598396671180252, "yes": 0.4044652316789331}, "ground_truth": 0}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7090960990191775, "res": {"Yes": 0.7090960990191775, "yes": 0.24817518278253106}, "ground_truth": 0}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8463010090492343, "res": {"Yes": 0.8463010090492343, "yes": 0.14976800732008638}, "ground_truth": 0}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9762586143003261, "res": {"Yes": 0.9762586143003261, "yes": 0.01966898203659337}, "ground_truth": 0}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9661564068721504, "res": {"Yes": 0.9661564068721504, "yes": 0.027349637618022685}, "ground_truth": 1}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8026960136769635, "res": {"Yes": 0.8026960136769635, "yes": 0.1905424832675074}, "ground_truth": 0}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9443838147950171, "res": {"Yes": 0.9443838147950171, "yes": 0.047388769913859846}, "ground_truth": 0}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8925052169707934, "res": {"Yes": 0.8925052169707934, "yes": 0.09842819317064004}, "ground_truth": 0}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9167361095373799, "res": {"Yes": 0.9167361095373799, "yes": 0.06143131853577141}, "ground_truth": 0}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8939544463255397, "res": {"Yes": 0.8939544463255397, "yes": 0.09500087280526102}, "ground_truth": 1}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9485697605735646, "res": {"Yes": 0.9485697605735646, "yes": 0.04570044242758797}, "ground_truth": 0}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8541955928303385, "res": {"Yes": 0.8541955928303385, "yes": 0.13430060105467553}, "ground_truth": 0}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6438847039753113, "res": {"Yes": 0.6438847039753113, "yes": 0.34754358409007824}, "ground_truth": 0}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.661492376538973, "res": {"Yes": 0.661492376538973, "yes": 0.3275976828944502}, "ground_truth": 0}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6262825824952135, "res": {"Yes": 0.6262825824952135, "yes": 0.36302223251317994}, "ground_truth": 1}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7971907769623485, "res": {"Yes": 0.7971907769623485, "yes": 0.18937469751505814}, "ground_truth": 0}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7335807858296869, "res": {"Yes": 0.7335807858296869, "yes": 0.2591810310135771}, "ground_truth": 0}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.923474077111953, "res": {"Yes": 0.923474077111953, "yes": 0.06843582733754039}, "ground_truth": 0}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8363019055908827, "res": {"Yes": 0.8363019055908827, "yes": 0.15583455178669803}, "ground_truth": 0}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8956076343575889, "res": {"Yes": 0.8956076343575889, "yes": 0.09631214999155334}, "ground_truth": 1}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8740186455263347, "res": {"Yes": 0.8740186455263347, "yes": 0.11875621507063693}, "ground_truth": 0}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8355413089321263, "res": {"Yes": 0.8355413089321263, "yes": 0.15855372248024616}, "ground_truth": 0}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9073633978122521, "res": {"Yes": 0.9073633978122521, "yes": 0.0837743000928541}, "ground_truth": 0}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8889456690635348, "res": {"Yes": 0.8889456690635348, "yes": 0.10639666995284874}, "ground_truth": 0}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8532647879705536, "res": {"Yes": 0.8532647879705536, "yes": 0.13945806949497777}, "ground_truth": 1}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8783437211574453, "res": {"Yes": 0.8783437211574453, "yes": 0.11111455309066913}, "ground_truth": 0}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8651161617725188, "res": {"Yes": 0.8651161617725188, "yes": 0.1269700051931286}, "ground_truth": 0}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9149156168327865, "res": {"Yes": 0.9149156168327865, "yes": 0.07929515098032729}, "ground_truth": 0}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.859353711935487, "res": {"Yes": 0.859353711935487, "yes": 0.13082321120587906}, "ground_truth": 0}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9293187891665742, "res": {"Yes": 0.9293187891665742, "yes": 0.06517068028957919}, "ground_truth": 1}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9509888148191239, "res": {"Yes": 0.9509888148191239, "yes": 0.04683427370348766}, "ground_truth": 0}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8379047905920656, "res": {"Yes": 0.8379047905920656, "yes": 0.15835845706552926}, "ground_truth": 0}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9168190586563548, "res": {"Yes": 0.9168190586563548, "yes": 0.0778079008198168}, "ground_truth": 0}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9333683820458829, "res": {"Yes": 0.9333683820458829, "yes": 0.06077578757495446}, "ground_truth": 0}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9265715871728469, "res": {"Yes": 0.9265715871728469, "yes": 0.06606791836136178}, "ground_truth": 1}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9404390606225678, "res": {"Yes": 0.9404390606225678, "yes": 0.056903774748859555}, "ground_truth": 0}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9607045802120012, "res": {"Yes": 0.9607045802120012, "yes": 0.03556547909211003}, "ground_truth": 0}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9699120855943991, "res": {"Yes": 0.9699120855943991, "yes": 0.0218847769566417}, "ground_truth": 0}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.980748325755724, "res": {"Yes": 0.980748325755724, "yes": 0.014624837456667438}, "ground_truth": 0}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9596591650181888, "res": {"Yes": 0.9596591650181888, "yes": 0.034616989203011185}, "ground_truth": 1}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7721581441507804, "res": {"Yes": 0.7721581441507804, "yes": 0.2222153327237487}, "ground_truth": 0}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9262962602770816, "res": {"Yes": 0.9262962602770816, "yes": 0.061591956085313135}, "ground_truth": 0}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8779786783335796, "res": {"Yes": 0.8779786783335796, "yes": 0.11284919602770703}, "ground_truth": 0}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8271673801997602, "res": {"Yes": 0.8271673801997602, "yes": 0.15763311548428288}, "ground_truth": 0}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8929780754274808, "res": {"Yes": 0.8929780754274808, "yes": 0.09518778064426686}, "ground_truth": 1}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8576300754149674, "res": {"Yes": 0.8576300754149674, "yes": 0.12278454822166592}, "ground_truth": 0}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7122358303574827, "res": {"Yes": 0.7122358303574827, "yes": 0.2866897667394431}, "ground_truth": 0}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9153268082402974, "res": {"Yes": 0.9153268082402974, "yes": 0.0766943605030416}, "ground_truth": 0}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8729130562731657, "res": {"Yes": 0.8729130562731657, "yes": 0.11957921980634828}, "ground_truth": 0}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9191485468739492, "res": {"Yes": 0.9191485468739492, "yes": 0.07339124708467837}, "ground_truth": 1}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8904412434112919, "res": {"Yes": 0.8904412434112919, "yes": 0.10268204571188227}, "ground_truth": 0}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9242663974175712, "res": {"Yes": 0.9242663974175712, "yes": 0.07047899900463334}, "ground_truth": 0}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8139248004391769, "res": {"Yes": 0.8139248004391769, "yes": 0.17815862034018573}, "ground_truth": 0}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6562905739605553, "res": {"Yes": 0.6562905739605553, "yes": 0.3243381936553194}, "ground_truth": 0}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.605000194175565, "res": {"Yes": 0.605000194175565, "yes": 0.3880784574025978}, "ground_truth": 1}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5907088633244427, "res": {"Yes": 0.5907088633244427, "yes": 0.40477626363234026}, "ground_truth": 0}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5740918948035018, "res": {"Yes": 0.5740918948035018, "yes": 0.41268503668694995}, "ground_truth": 0}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8198890440610663, "res": {"Yes": 0.8198890440610663, "yes": 0.16702152210312876}, "ground_truth": 0}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.968299436802656, "res": {"Yes": 0.968299436802656, "yes": 0.028196119976223292}, "ground_truth": 0}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9610766082187163, "res": {"Yes": 0.9610766082187163, "yes": 0.036297326157403234}, "ground_truth": 1}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9438683493466672, "res": {"Yes": 0.9438683493466672, "yes": 0.05022706771948215}, "ground_truth": 0}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9310003367474811, "res": {"Yes": 0.9310003367474811, "yes": 0.05940793355500586}, "ground_truth": 0}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9525353907771379, "res": {"Yes": 0.9525353907771379, "yes": 0.03846640561645213}, "ground_truth": 0}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.922939835725321, "res": {"Yes": 0.922939835725321, "yes": 0.06945742528125544}, "ground_truth": 0}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9307676157546304, "res": {"Yes": 0.9307676157546304, "yes": 0.06139558205005436}, "ground_truth": 1}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9363735689523746, "res": {"Yes": 0.9363735689523746, "yes": 0.05834351885653111}, "ground_truth": 0}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8964963655398424, "res": {"Yes": 0.8964963655398424, "yes": 0.09398350413852183}, "ground_truth": 0}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8335757159043817, "res": {"Yes": 0.8335757159043817, "yes": 0.16172815642605087}, "ground_truth": 0}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7349373951968194, "res": {"Yes": 0.7349373951968194, "yes": 0.24768574341427682}, "ground_truth": 0}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8381588818428601, "res": {"Yes": 0.8381588818428601, "yes": 0.1525552276454279}, "ground_truth": 1}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8817475486202316, "res": {"Yes": 0.8817475486202316, "yes": 0.11009191182851205}, "ground_truth": 0}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8644391619667237, "res": {"Yes": 0.8644391619667237, "yes": 0.12810848947847348}, "ground_truth": 0}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8344407825432486, "res": {"Yes": 0.8344407825432486, "yes": 0.1552776149363654}, "ground_truth": 0}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.923914485299946, "res": {"Yes": 0.923914485299946, "yes": 0.06918602414350382}, "ground_truth": 0}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8041162586064512, "res": {"Yes": 0.8041162586064512, "yes": 0.18838344354015998}, "ground_truth": 1}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8978563321042732, "res": {"Yes": 0.8978563321042732, "yes": 0.09155025706009194}, "ground_truth": 0}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8654228219299159, "res": {"Yes": 0.8654228219299159, "yes": 0.1268257223375711}, "ground_truth": 0}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8979202527717848, "res": {"Yes": 0.8979202527717848, "yes": 0.08964967537175145}, "ground_truth": 0}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9893766803033519, "res": {"Yes": 0.9893766803033519, "yes": 0.006011804054626784}, "ground_truth": 0}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9139070431839081, "res": {"Yes": 0.9139070431839081, "yes": 0.07341316498323727}, "ground_truth": 1}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9411772064157558, "res": {"Yes": 0.9411772064157558, "yes": 0.05100144057949376}, "ground_truth": 0}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9630361653582791, "res": {"Yes": 0.9630361653582791, " Yes": 0.0163299309569021}, "ground_truth": 0}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8653601675857229, "res": {"Yes": 0.8653601675857229, "yes": 0.12452354057839012}, "ground_truth": 0}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8325659758478392, "res": {"Yes": 0.8325659758478392, "yes": 0.15331168084789407}, "ground_truth": 0}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7512303649350563, "res": {"Yes": 0.7512303649350563, "yes": 0.23524357788420205}, "ground_truth": 1}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.839450817204746, "res": {"Yes": 0.839450817204746, "yes": 0.14888166249985763}, "ground_truth": 0}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.812879660057322, "res": {"Yes": 0.812879660057322, "yes": 0.16984145410618395}, "ground_truth": 0}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8957339776812766, "res": {"Yes": 0.8957339776812766, "yes": 0.10137183375810413}, "ground_truth": 0}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.881973375446115, "res": {"Yes": 0.881973375446115, "yes": 0.11374893316093788}, "ground_truth": 0}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.800405479775314, "res": {"Yes": 0.800405479775314, "yes": 0.19537900332175134}, "ground_truth": 1}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8621495254175899, "res": {"Yes": 0.8621495254175899, "yes": 0.13468689373037523}, "ground_truth": 0}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8570218112569454, "res": {"Yes": 0.8570218112569454, "yes": 0.139700043686882}, "ground_truth": 0}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7866496824260802, "res": {"Yes": 0.7866496824260802, "yes": 0.1909323313185569}, "ground_truth": 0}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5460124780101095, "res": {"Yes": 0.5460124780101095, "yes": 0.44691323873388145}, "ground_truth": 0}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6236969098548576, "res": {"Yes": 0.6236969098548576, "yes": 0.3652955233420132}, "ground_truth": 1}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7187417824852032, "res": {"Yes": 0.7187417824852032, "yes": 0.25682748091061475}, "ground_truth": 0}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6261742825503629, "res": {"Yes": 0.6261742825503629, "yes": 0.36334699217189703}, "ground_truth": 0}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9311358164630689, "res": {"Yes": 0.9311358164630689, "yes": 0.060631695171713365}, "ground_truth": 0}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8974404157141469, "res": {"Yes": 0.8974404157141469, "yes": 0.09057940587451006}, "ground_truth": 0}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9241555198289982, "res": {"Yes": 0.9241555198289982, "yes": 0.07024409017007001}, "ground_truth": 1}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9802114179327582, "res": {"Yes": 0.9802114179327582, "yes": 0.01469055512790714}, "ground_truth": 0}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9412185033540279, "res": {"Yes": 0.9412185033540279, "yes": 0.05219059357730194}, "ground_truth": 0}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8395091106934193, "res": {"Yes": 0.8395091106934193, "yes": 0.13633784762388562}, "ground_truth": 0}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8812985360530949, "res": {"Yes": 0.8812985360530949, "yes": 0.11279621448629196}, "ground_truth": 0}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6163227178615901, "res": {"Yes": 0.6163227178615901, "yes": 0.3686845789811935}, "ground_truth": 1}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8881763885153007, "res": {"Yes": 0.8881763885153007, "yes": 0.10180375393054215}, "ground_truth": 0}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.627216965955556, "res": {"Yes": 0.627216965955556, "yes": 0.35563263680935725}, "ground_truth": 0}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8772518023284798, "res": {"Yes": 0.8772518023284798, "yes": 0.11662528221122258}, "ground_truth": 0}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8161177768975256, "res": {"Yes": 0.8161177768975256, "yes": 0.1802647816138371}, "ground_truth": 0}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9391444798077646, "res": {"Yes": 0.9391444798077646, "yes": 0.04626135183774504}, "ground_truth": 1}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8564695958328434, "res": {"Yes": 0.8564695958328434, "yes": 0.13574211589177457}, "ground_truth": 0}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9811336305385175, "res": {"Yes": 0.9811336305385175, "yes": 0.0121079986417569}, "ground_truth": 0}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6703652941658539, "res": {"Yes": 0.6703652941658539, "yes": 0.3247154560467225}, "ground_truth": 0}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8458658852636209, "res": {"Yes": 0.8458658852636209, "yes": 0.1513690173405836}, "ground_truth": 0}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8596067944890192, "res": {"Yes": 0.8596067944890192, "yes": 0.13627182156329953}, "ground_truth": 1}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8946716316856042, "res": {"Yes": 0.8946716316856042, "yes": 0.09900481437296492}, "ground_truth": 0}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6706426280102853, "res": {"Yes": 0.6706426280102853, "yes": 0.32361417198328846}, "ground_truth": 0}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9793915475342869, "res": {"Yes": 0.9793915475342869, "yes": 0.01575463769860783}, "ground_truth": 0}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9639871522518293, "res": {"Yes": 0.9639871522518293, "yes": 0.030132094886380533}, "ground_truth": 0}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.951356049174142, "res": {"Yes": 0.951356049174142, "yes": 0.04335829259706577}, "ground_truth": 1}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.757181387985431, "res": {"Yes": 0.757181387985431, "yes": 0.2381815692879897}, "ground_truth": 0}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5925862185614198, "res": {"Yes": 0.5925862185614198, "yes": 0.40112834521679136}, "ground_truth": 0}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9678414020368157, "res": {"Yes": 0.9678414020368157, "yes": 0.028376833174462494}, "ground_truth": 0}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9591839098128189, "res": {"Yes": 0.9591839098128189, "yes": 0.03141008058030934}, "ground_truth": 0}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9480072851492974, "res": {"Yes": 0.9480072851492974, "yes": 0.047189588186173755}, "ground_truth": 1}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9619053576282097, "res": {"Yes": 0.9619053576282097, "yes": 0.03690821957615633}, "ground_truth": 0}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9516844816148063, "res": {"Yes": 0.9516844816148063, "yes": 0.037472408131014844}, "ground_truth": 0}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7589047982152641, "res": {"Yes": 0.7589047982152641, "yes": 0.233626638466093}, "ground_truth": 0}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8692076755912904, "res": {"Yes": 0.8692076755912904, "yes": 0.12429937607342402}, "ground_truth": 0}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9083256675345672, "res": {"Yes": 0.9083256675345672, "yes": 0.08372641164067428}, "ground_truth": 1}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8643563267309461, "res": {"Yes": 0.8643563267309461, "yes": 0.12261729462157307}, "ground_truth": 0}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8689291826155674, "res": {"Yes": 0.8689291826155674, "yes": 0.12056922616648957}, "ground_truth": 0}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6795232497558903, "res": {"Yes": 0.6795232497558903, "yes": 0.3041793756827235}, "ground_truth": 0}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8393731212329306, "res": {"Yes": 0.8393731212329306, "yes": 0.1502188572393867}, "ground_truth": 0}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7777626876982031, "res": {"Yes": 0.7777626876982031, "yes": 0.210688276592998}, "ground_truth": 1}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8474794820424568, "res": {"Yes": 0.8474794820424568, "yes": 0.14432447726086822}, "ground_truth": 0}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7982058080176719, "res": {"Yes": 0.7982058080176719, "yes": 0.1853064199577208}, "ground_truth": 0}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8353087683741186, "res": {"Yes": 0.8353087683741186, "yes": 0.14354467043412003}, "ground_truth": 0}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7787661739336259, "res": {"Yes": 0.7787661739336259, "yes": 0.20555837954773987}, "ground_truth": 0}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7461117433368499, "res": {"Yes": 0.7461117433368499, "yes": 0.2307362259281313}, "ground_truth": 1}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8568544426719, "res": {"Yes": 0.8568544426719, "yes": 0.13317428434332113}, "ground_truth": 0}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7657772330576754, "res": {"Yes": 0.7657772330576754, "yes": 0.21558312779285532}, "ground_truth": 0}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8362382236254935, "res": {"Yes": 0.8362382236254935, "yes": 0.14773016186025933}, "ground_truth": 0}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6733633089522421, "res": {"Yes": 0.6733633089522421, "yes": 0.31894972196954763}, "ground_truth": 0}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7631862692763558, "res": {"Yes": 0.7631862692763558, "yes": 0.22775331215636535}, "ground_truth": 1}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6790216614120295, "res": {"Yes": 0.6790216614120295, "yes": 0.3136231415564484}, "ground_truth": 0}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7034880423095085, "res": {"Yes": 0.7034880423095085, "yes": 0.2751590458360153}, "ground_truth": 0}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7387674644498575, "res": {"Yes": 0.7387674644498575, "yes": 0.25548780510699304}, "ground_truth": 0}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8337129004114225, "res": {"Yes": 0.8337129004114225, "yes": 0.1608688085336141}, "ground_truth": 0}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9711944153022654, "res": {"Yes": 0.9711944153022654, "yes": 0.024036508907848718}, "ground_truth": 1}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.978506827161184, "res": {"Yes": 0.978506827161184, "yes": 0.018583871464469222}, "ground_truth": 0}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.980013192179961, "res": {"Yes": 0.980013192179961, "yes": 0.015544361665009781}, "ground_truth": 0}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8732466426179837, "res": {"Yes": 0.8732466426179837, "yes": 0.12071848672664627}, "ground_truth": 0}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.976807134754592, "res": {"Yes": 0.976807134754592, "yes": 0.012946316130406026}, "ground_truth": 0}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.806515010247389, "res": {"Yes": 0.806515010247389, "yes": 0.18527530962159566}, "ground_truth": 1}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9008371151629141, "res": {"Yes": 0.9008371151629141, "yes": 0.09326766513074403}, "ground_truth": 0}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9466052266757856, "res": {"Yes": 0.9466052266757856, "yes": 0.04822580633792039}, "ground_truth": 0}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7569507175316006, "res": {"Yes": 0.7569507175316006, "yes": 0.23516088078122063}, "ground_truth": 0}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8225714342186522, "res": {"Yes": 0.8225714342186522, "yes": 0.16652990101658868}, "ground_truth": 0}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6060076555935267, "res": {"Yes": 0.6060076555935267, "yes": 0.3692032217837472}, "ground_truth": 1}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8747976644441656, "res": {"Yes": 0.8747976644441656, "yes": 0.11759139711235331}, "ground_truth": 0}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7662726979881025, "res": {"Yes": 0.7662726979881025, "yes": 0.2254875121646767}, "ground_truth": 0}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.948160176923768, "res": {"Yes": 0.948160176923768, "yes": 0.042993772472406905}, "ground_truth": 0}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7997043836829558, "res": {"Yes": 0.7997043836829558, "yes": 0.1918997651084956}, "ground_truth": 0}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9586780353792675, "res": {"Yes": 0.9586780353792675, "yes": 0.03130373373162573}, "ground_truth": 1}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8435747348768161, "res": {"Yes": 0.8435747348768161, "yes": 0.14978052844887424}, "ground_truth": 0}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8552673233706056, "res": {"Yes": 0.8552673233706056, "yes": 0.13952390924073707}, "ground_truth": 0}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9002123311350554, "res": {"Yes": 0.9002123311350554, "yes": 0.09336418178177058}, "ground_truth": 0}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9806273686974882, "res": {"Yes": 0.9806273686974882, "yes": 0.019062313657624817}, "ground_truth": 0}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9075269421175662, "res": {"Yes": 0.9075269421175662, "yes": 0.08565771349056943}, "ground_truth": 1}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9386345032151987, "res": {"Yes": 0.9386345032151987, "yes": 0.054172070921433606}, "ground_truth": 0}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8870182250857903, "res": {"Yes": 0.8870182250857903, "yes": 0.10249334595736782}, "ground_truth": 0}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8009248151917183, "res": {"Yes": 0.8009248151917183, "yes": 0.18982294684166365}, "ground_truth": 0}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8990649771330216, "res": {"Yes": 0.8990649771330216, "yes": 0.09394658527293187}, "ground_truth": 0}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8039790800339213, "res": {"Yes": 0.8039790800339213, "yes": 0.18650362557626707}, "ground_truth": 1}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8720007543441195, "res": {"Yes": 0.8720007543441195, "yes": 0.11920843064074808}, "ground_truth": 0}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8254379871552907, "res": {"Yes": 0.8254379871552907, "yes": 0.16493589429483915}, "ground_truth": 0}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.886094947409186, "res": {"Yes": 0.886094947409186, "yes": 0.10261207121483762}, "ground_truth": 0}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.938956834949829, "res": {"Yes": 0.938956834949829, "yes": 0.05426536504644973}, "ground_truth": 0}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8132893977352978, "res": {"Yes": 0.8132893977352978, "yes": 0.1767135533490631}, "ground_truth": 1}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9402774732198107, "res": {"Yes": 0.9402774732198107, "yes": 0.052810597404003616}, "ground_truth": 0}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8976015521435637, "res": {"Yes": 0.8976015521435637, "yes": 0.09277222347463936}, "ground_truth": 0}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.40884394022317955, "res": {"yes": 0.5531294267987453, "Yes": 0.40884394022317955}, "ground_truth": 0}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5283372542885835, "res": {"Yes": 0.5283372542885835, "yes": 0.46225268508626227}, "ground_truth": 0}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5003440385882558, "res": {"Yes": 0.5003440385882558, "yes": 0.4779117437712075}, "ground_truth": 1}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.528094729202394, "res": {"Yes": 0.528094729202394, "yes": 0.46210307810729573}, "ground_truth": 0}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5608552657841838, "res": {"Yes": 0.5608552657841838, "yes": 0.4214857120413769}, "ground_truth": 0}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.784168490790005, "res": {"Yes": 0.784168490790005, "yes": 0.21331007941072258}, "ground_truth": 0}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8107625049379974, "res": {"Yes": 0.8107625049379974, "yes": 0.18199140832957197}, "ground_truth": 0}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.750550756461677, "res": {"Yes": 0.750550756461677, "yes": 0.24694918308964073}, "ground_truth": 1}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.781546476499698, "res": {"Yes": 0.781546476499698, "yes": 0.2079671384465533}, "ground_truth": 0}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7893387573714886, "res": {"Yes": 0.7893387573714886, "yes": 0.20800717597419266}, "ground_truth": 0}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.888612541327121, "res": {"Yes": 0.888612541327121, "yes": 0.10593134324050586}, "ground_truth": 0}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6972283663934925, "res": {"Yes": 0.6972283663934925, "yes": 0.29804490974176145}, "ground_truth": 0}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9808085308395261, "res": {"Yes": 0.9808085308395261, "yes": 0.013688441340979952}, "ground_truth": 1}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8981495300586666, "res": {"Yes": 0.8981495300586666, "yes": 0.09519641456751379}, "ground_truth": 0}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8212087818261945, "res": {"Yes": 0.8212087818261945, "yes": 0.17540008753234898}, "ground_truth": 0}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9537438830328507, "res": {"Yes": 0.9537438830328507, "yes": 0.041470759103845505}, "ground_truth": 0}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9486290708034117, "res": {"Yes": 0.9486290708034117, "yes": 0.04522752126004427}, "ground_truth": 0}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9289737284078292, "res": {"Yes": 0.9289737284078292, "yes": 0.06490048384896244}, "ground_truth": 1}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9707061156854877, "res": {"Yes": 0.9707061156854877, "yes": 0.024697024702313376}, "ground_truth": 0}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9498588949984852, "res": {"Yes": 0.9498588949984852, "yes": 0.04393141549109028}, "ground_truth": 0}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9025598366280544, "res": {"Yes": 0.9025598366280544, "yes": 0.08978436751074263}, "ground_truth": 0}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9222917304559033, "res": {"Yes": 0.9222917304559033, "yes": 0.07190103639241367}, "ground_truth": 0}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.85427745431633, "res": {"Yes": 0.85427745431633, "yes": 0.13762732367176153}, "ground_truth": 1}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8260464848388829, "res": {"Yes": 0.8260464848388829, "yes": 0.16114938291282072}, "ground_truth": 0}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9395575368626001, "res": {"Yes": 0.9395575368626001, "yes": 0.05512940715614705}, "ground_truth": 0}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8301663969762008, "res": {"Yes": 0.8301663969762008, "yes": 0.16479791718947684}, "ground_truth": 0}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7449446937018988, "res": {"Yes": 0.7449446937018988, "yes": 0.24980379231519284}, "ground_truth": 0}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8253280296310751, "res": {"Yes": 0.8253280296310751, "yes": 0.17150442687964232}, "ground_truth": 1}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8381138403948245, "res": {"Yes": 0.8381138403948245, "yes": 0.15619140011100766}, "ground_truth": 0}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8635120643842091, "res": {"Yes": 0.8635120643842091, "yes": 0.13072714376481848}, "ground_truth": 0}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9325349856916183, "res": {"Yes": 0.9325349856916183, "yes": 0.057665540629453005}, "ground_truth": 0}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.820539826766416, "res": {"Yes": 0.820539826766416, "yes": 0.16419595776821358}, "ground_truth": 0}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8332108230011138, "res": {"Yes": 0.8332108230011138, "yes": 0.14919043527549625}, "ground_truth": 1}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8747587018238953, "res": {"Yes": 0.8747587018238953, "yes": 0.10790744608178927}, "ground_truth": 0}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8635735831749491, "res": {"Yes": 0.8635735831749491, "yes": 0.11367088268320463}, "ground_truth": 0}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7763795248063959, "res": {"Yes": 0.7763795248063959, "yes": 0.2169987903242157}, "ground_truth": 0}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8629124081320405, "res": {"Yes": 0.8629124081320405, "yes": 0.12284293387306021}, "ground_truth": 0}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7781509114718433, "res": {"Yes": 0.7781509114718433, "yes": 0.21495220024802592}, "ground_truth": 1}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6994116621396205, "res": {"Yes": 0.6994116621396205, "yes": 0.27735372552359727}, "ground_truth": 0}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8427416609187374, "res": {"Yes": 0.8427416609187374, "yes": 0.14984409381153313}, "ground_truth": 0}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7517304976880468, "res": {"Yes": 0.7517304976880468, "yes": 0.2414437867656254}, "ground_truth": 0}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8391113364066175, "res": {"Yes": 0.8391113364066175, "yes": 0.15308643231558014}, "ground_truth": 0}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9523147929356236, "res": {"Yes": 0.9523147929356236, "yes": 0.043832847488780614}, "ground_truth": 1}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8443468998936016, "res": {"Yes": 0.8443468998936016, "yes": 0.1525773497572508}, "ground_truth": 0}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.837097154966564, "res": {"Yes": 0.837097154966564, "yes": 0.1563881532849077}, "ground_truth": 0}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.72553024430069, "res": {"Yes": 0.72553024430069, "yes": 0.2517707066303088}, "ground_truth": 0}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8868781138837483, "res": {"Yes": 0.8868781138837483, "yes": 0.10185571764358668}, "ground_truth": 0}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8891904650635735, "res": {"Yes": 0.8891904650635735, "yes": 0.09918155693203984}, "ground_truth": 1}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.907077291363432, "res": {"Yes": 0.907077291363432, "yes": 0.08347296266326074}, "ground_truth": 0}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8836906597452534, "res": {"Yes": 0.8836906597452534, "yes": 0.09921564157040721}, "ground_truth": 0}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9660984324640959, "res": {"Yes": 0.9660984324640959, "yes": 0.01951137597853168}, "ground_truth": 0}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7920089790877259, "res": {"Yes": 0.7920089790877259, "yes": 0.19712181324366157}, "ground_truth": 0}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9550725316630717, "res": {"Yes": 0.9550725316630717, "yes": 0.026585178971509246}, "ground_truth": 1}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9714470208520677, "res": {"Yes": 0.9714470208520677, " Yes": 0.01638717929799847}, "ground_truth": 0}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9555509670621405, "res": {"Yes": 0.9555509670621405, "yes": 0.026761954721392842}, "ground_truth": 0}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9710822099817475, "res": {"Yes": 0.9710822099817475, "yes": 0.023064929927998763}, "ground_truth": 0}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9669349777495764, "res": {"Yes": 0.9669349777495764, "yes": 0.024957542446596405}, "ground_truth": 0}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.978923517096854, "res": {"Yes": 0.978923517096854, "yes": 0.013235187187939894}, "ground_truth": 1}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7808101378818594, "res": {"Yes": 0.7808101378818594, "yes": 0.21213296659470424}, "ground_truth": 0}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6885420623098422, "res": {"Yes": 0.6885420623098422, "yes": 0.2999607038716484}, "ground_truth": 0}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.887386981970612, "res": {"Yes": 0.887386981970612, "yes": 0.10853238188240981}, "ground_truth": 0}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6962458557517567, "res": {"Yes": 0.6962458557517567, "yes": 0.2975179261039989}, "ground_truth": 0}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.884677882327755, "res": {"Yes": 0.884677882327755, "yes": 0.10683242432329787}, "ground_truth": 1}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7783034595642601, "res": {"Yes": 0.7783034595642601, "yes": 0.21456237292844943}, "ground_truth": 0}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6998612695061792, "res": {"Yes": 0.6998612695061792, "yes": 0.289482104233197}, "ground_truth": 0}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8143271251992952, "res": {"Yes": 0.8143271251992952, "yes": 0.1800211562997522}, "ground_truth": 0}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8838501573255305, "res": {"Yes": 0.8838501573255305, "yes": 0.10869950920484363}, "ground_truth": 0}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8996233349343599, "res": {"Yes": 0.8996233349343599, "yes": 0.09284251577303214}, "ground_truth": 1}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9189552151209756, "res": {"Yes": 0.9189552151209756, "yes": 0.075511531446435}, "ground_truth": 0}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7789223088381736, "res": {"Yes": 0.7789223088381736, "yes": 0.212585546470925}, "ground_truth": 0}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9740592836708848, "res": {"Yes": 0.9740592836708848, "yes": 0.019181966233501997}, "ground_truth": 0}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9724770176551398, "res": {"Yes": 0.9724770176551398, "yes": 0.023095478882089345}, "ground_truth": 0}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9828107364951935, "res": {"Yes": 0.9828107364951935, "yes": 0.012432922306566705}, "ground_truth": 1}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9769361394459283, "res": {"Yes": 0.9769361394459283, "yes": 0.021000015566438768}, "ground_truth": 0}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.743558190878482, "res": {"Yes": 0.743558190878482, "yes": 0.25306497627669833}, "ground_truth": 0}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8433788964306373, "res": {"Yes": 0.8433788964306373, "yes": 0.14874387676629786}, "ground_truth": 0}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.917146031703938, "res": {"Yes": 0.917146031703938, "yes": 0.07623967366576158}, "ground_truth": 0}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9512148422654095, "res": {"Yes": 0.9512148422654095, "yes": 0.04338345203939201}, "ground_truth": 1}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8560067220577716, "res": {"Yes": 0.8560067220577716, "yes": 0.13352961373649358}, "ground_truth": 0}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9238547561592366, "res": {"Yes": 0.9238547561592366, "yes": 0.0696426577973967}, "ground_truth": 0}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.524579625744451, "res": {"Yes": 0.524579625744451, "yes": 0.4659962463182431}, "ground_truth": 0}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7297566693966411, "res": {"Yes": 0.7297566693966411, "yes": 0.26769982022768607}, "ground_truth": 0}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5304304140665655, "res": {"Yes": 0.5304304140665655, "yes": 0.44649766868026924}, "ground_truth": 1}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6549034361109131, "res": {"Yes": 0.6549034361109131, "yes": 0.3377586810099625}, "ground_truth": 0}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7799932524820151, "res": {"Yes": 0.7799932524820151, "yes": 0.21682879499360966}, "ground_truth": 0}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6583859957302706, "res": {"Yes": 0.6583859957302706, "yes": 0.32937373509917317}, "ground_truth": 0}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8018027331594354, "res": {"Yes": 0.8018027331594354, "yes": 0.1848289086352196}, "ground_truth": 0}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8561812025676266, "res": {"Yes": 0.8561812025676266, "yes": 0.13460947102764045}, "ground_truth": 1}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9003914732069583, "res": {"Yes": 0.9003914732069583, "yes": 0.09161510675444083}, "ground_truth": 0}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7889052374306796, "res": {"Yes": 0.7889052374306796, "yes": 0.2039600372829087}, "ground_truth": 0}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7873839926927575, "res": {"Yes": 0.7873839926927575, "yes": 0.20657397464581892}, "ground_truth": 0}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8148038414891611, "res": {"Yes": 0.8148038414891611, "yes": 0.17885692400363576}, "ground_truth": 0}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8447774914741218, "res": {"Yes": 0.8447774914741218, "yes": 0.15003213117376524}, "ground_truth": 1}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7988767663239175, "res": {"Yes": 0.7988767663239175, "yes": 0.19050559781337398}, "ground_truth": 0}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8480808633605469, "res": {"Yes": 0.8480808633605469, "yes": 0.14688550106892312}, "ground_truth": 0}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8574992796506037, "res": {"Yes": 0.8574992796506037, "yes": 0.1347542270650603}, "ground_truth": 0}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9203901985477693, "res": {"Yes": 0.9203901985477693, "yes": 0.07049796745535608}, "ground_truth": 0}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9047370807492537, "res": {"Yes": 0.9047370807492537, "yes": 0.08928377486937174}, "ground_truth": 1}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9344276370717057, "res": {"Yes": 0.9344276370717057, "yes": 0.058912122179577434}, "ground_truth": 0}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9322538128387067, "res": {"Yes": 0.9322538128387067, "yes": 0.06105662907275273}, "ground_truth": 0}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8378611117538308, "res": {"Yes": 0.8378611117538308, "yes": 0.1570251963821346}, "ground_truth": 0}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8567437356617428, "res": {"Yes": 0.8567437356617428, "yes": 0.1361275860904267}, "ground_truth": 0}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8897517168559916, "res": {"Yes": 0.8897517168559916, "yes": 0.1046393607847851}, "ground_truth": 1}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9947361495705541, "res": {"Yes": 0.9947361495705541, "yes": 0.0019680802573693347}, "ground_truth": 0}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7576391667431345, "res": {"Yes": 0.7576391667431345, "yes": 0.23516295020607697}, "ground_truth": 0}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7624968665131234, "res": {"Yes": 0.7624968665131234, "yes": 0.21374894646417225}, "ground_truth": 0}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7117863804822216, "res": {"Yes": 0.7117863804822216, "yes": 0.2494369502835544}, "ground_truth": 0}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7547655935943531, "res": {"Yes": 0.7547655935943531, "yes": 0.19461985200873008}, "ground_truth": 1}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8799408761149432, "res": {"Yes": 0.8799408761149432, "yes": 0.09271005908996538}, "ground_truth": 0}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8093620210428359, "res": {"Yes": 0.8093620210428359, "yes": 0.1741327394853541}, "ground_truth": 0}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9128580289368455, "res": {"Yes": 0.9128580289368455, "yes": 0.07217655563580135}, "ground_truth": 0}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9239147901917765, "res": {"Yes": 0.9239147901917765, "yes": 0.06644996636453906}, "ground_truth": 0}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9098029057659434, "res": {"Yes": 0.9098029057659434, "yes": 0.07800633013566756}, "ground_truth": 1}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9237004298749061, "res": {"Yes": 0.9237004298749061, "yes": 0.06438123952724911}, "ground_truth": 0}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9136790883669019, "res": {"Yes": 0.9136790883669019, "yes": 0.0729301095399209}, "ground_truth": 0}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5799541095717237, "res": {"Yes": 0.5799541095717237, "yes": 0.2070816746520536}, "ground_truth": 0}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6926434253130731, "res": {"Yes": 0.6926434253130731, "yes": 0.27381871928566376}, "ground_truth": 0}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6445811970349218, "res": {"Yes": 0.6445811970349218, "yes": 0.30292328207616537}, "ground_truth": 1}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9599099797603565, "res": {"Yes": 0.9599099797603565, "yes": 0.03338034250554038}, "ground_truth": 0}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5879090969844935, "res": {"Yes": 0.5879090969844935, "yes": 0.36567657906448}, "ground_truth": 0}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6586411697403876, "res": {"Yes": 0.6586411697403876, "yes": 0.25255361908462326}, "ground_truth": 0}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7033469015942347, "res": {"Yes": 0.7033469015942347, "yes": 0.26706118220414743}, "ground_truth": 0}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6775735305095827, "res": {"Yes": 0.6775735305095827, "yes": 0.17854707268633369}, "ground_truth": 1}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6196632274820919, "res": {"Yes": 0.6196632274820919, "yes": 0.276287386378606}, "ground_truth": 0}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7796465211714476, "res": {"Yes": 0.7796465211714476, "yes": 0.15833681094399452}, "ground_truth": 0}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8647998985218993, "res": {"Yes": 0.8647998985218993, "yes": 0.12272091320748854}, "ground_truth": 0}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8211804916709613, "res": {"Yes": 0.8211804916709613, "yes": 0.17317268554431417}, "ground_truth": 0}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.875668731460196, "res": {"Yes": 0.875668731460196, "yes": 0.11778618378495907}, "ground_truth": 1}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8338767244156385, "res": {"Yes": 0.8338767244156385, "yes": 0.15456091322322904}, "ground_truth": 0}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8040056680617286, "res": {"Yes": 0.8040056680617286, "yes": 0.18627108873963888}, "ground_truth": 0}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9231987566612055, "res": {"Yes": 0.9231987566612055, "yes": 0.07242977552347403}, "ground_truth": 0}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7833373522447425, "res": {"Yes": 0.7833373522447425, "yes": 0.20896895055323275}, "ground_truth": 0}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8359239658281183, "res": {"Yes": 0.8359239658281183, "yes": 0.15954893520799857}, "ground_truth": 1}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7920599861083876, "res": {"Yes": 0.7920599861083876, "yes": 0.20321190040651102}, "ground_truth": 0}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9827437238642251, "res": {"Yes": 0.9827437238642251, "yes": 0.01384838998051787}, "ground_truth": 0}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8503920334513364, "res": {"Yes": 0.8503920334513364, "yes": 0.14364426755382137}, "ground_truth": 0}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7402438056142404, "res": {"Yes": 0.7402438056142404, "yes": 0.2563814511400701}, "ground_truth": 0}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9098023098452352, "res": {"Yes": 0.9098023098452352, "yes": 0.08551672569561576}, "ground_truth": 1}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8276340424625873, "res": {"Yes": 0.8276340424625873, "yes": 0.16828217423536782}, "ground_truth": 0}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8542588398134061, "res": {"Yes": 0.8542588398134061, "yes": 0.1420792895645906}, "ground_truth": 0}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8855851287645382, "res": {"Yes": 0.8855851287645382, "yes": 0.10997656294802129}, "ground_truth": 0}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6985865147814863, "res": {"Yes": 0.6985865147814863, "yes": 0.29497561118769877}, "ground_truth": 0}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8767248200681377, "res": {"Yes": 0.8767248200681377, "yes": 0.11831145294049845}, "ground_truth": 1}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9134335573168276, "res": {"Yes": 0.9134335573168276, "yes": 0.07909742171491546}, "ground_truth": 0}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8225801452962657, "res": {"Yes": 0.8225801452962657, "yes": 0.17153715304626796}, "ground_truth": 0}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7798533732362288, "res": {"Yes": 0.7798533732362288, "yes": 0.21477304780249287}, "ground_truth": 0}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8640023675171061, "res": {"Yes": 0.8640023675171061, "yes": 0.13195607195523104}, "ground_truth": 0}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9504078468900117, "res": {"Yes": 0.9504078468900117, "yes": 0.04195650468684131}, "ground_truth": 1}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8440802972413897, "res": {"Yes": 0.8440802972413897, "yes": 0.1503961749955862}, "ground_truth": 0}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8510717171396311, "res": {"Yes": 0.8510717171396311, "yes": 0.144544609906132}, "ground_truth": 0}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8386049009985764, "res": {"Yes": 0.8386049009985764, "yes": 0.15241879752436904}, "ground_truth": 0}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9792641722356984, "res": {"Yes": 0.9792641722356984, "yes": 0.01581111192099656}, "ground_truth": 0}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8825653075705158, "res": {"Yes": 0.8825653075705158, "yes": 0.10611544221338076}, "ground_truth": 1}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9018169116096003, "res": {"Yes": 0.9018169116096003, "yes": 0.09210733474194252}, "ground_truth": 0}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8797110303842235, "res": {"Yes": 0.8797110303842235, "yes": 0.11065328773531706}, "ground_truth": 0}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7966987114151293, "res": {"Yes": 0.7966987114151293, "yes": 0.197477544468271}, "ground_truth": 0}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8904084579682933, "res": {"Yes": 0.8904084579682933, "yes": 0.10620273682777462}, "ground_truth": 0}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8912677336037234, "res": {"Yes": 0.8912677336037234, "yes": 0.10290003458903131}, "ground_truth": 1}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9052832598004983, "res": {"Yes": 0.9052832598004983, "yes": 0.08804679891590474}, "ground_truth": 0}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9241390238001967, "res": {"Yes": 0.9241390238001967, "yes": 0.07117793573164159}, "ground_truth": 0}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7374058646997967, "res": {"Yes": 0.7374058646997967, "yes": 0.25404402505367857}, "ground_truth": 0}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7450222837321785, "res": {"Yes": 0.7450222837321785, "yes": 0.2483380391134727}, "ground_truth": 0}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6854495010612827, "res": {"Yes": 0.6854495010612827, "yes": 0.2897444938641868}, "ground_truth": 1}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4904250082718454, "res": {"yes": 0.5026016856592183, "Yes": 0.4904250082718454}, "ground_truth": 0}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6757962187081549, "res": {"Yes": 0.6757962187081549, "yes": 0.31764346640998087}, "ground_truth": 0}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5711687379124295, "res": {"Yes": 0.5711687379124295, "yes": 0.36993667005768927}, "ground_truth": 0}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6108744828284458, "res": {"Yes": 0.6108744828284458, "\u064a": 0.14134794971086262}, "ground_truth": 0}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8729076966034545, "res": {"Yes": 0.8729076966034545, "yes": 0.12421027294258767}, "ground_truth": 1}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8641016211695871, "res": {"Yes": 0.8641016211695871, "yes": 0.13173141027473204}, "ground_truth": 0}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8315055547288759, "res": {"Yes": 0.8315055547288759, "yes": 0.15487477373287492}, "ground_truth": 0}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9835402585799481, "res": {"Yes": 0.9835402585799481, "yes": 0.01160525344733036}, "ground_truth": 0}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9688123878760652, "res": {"Yes": 0.9688123878760652, "yes": 0.025009848292116287}, "ground_truth": 0}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8060322861658907, "res": {"Yes": 0.8060322861658907, "yes": 0.18929407893377026}, "ground_truth": 1}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7162762087497193, "res": {"Yes": 0.7162762087497193, "yes": 0.2765683752952819}, "ground_truth": 0}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9734316034847069, "res": {"Yes": 0.9734316034847069, "yes": 0.023940123982045345}, "ground_truth": 0}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8850536626940484, "res": {"Yes": 0.8850536626940484, "yes": 0.10993319674397219}, "ground_truth": 0}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8924399772239747, "res": {"Yes": 0.8924399772239747, "yes": 0.102808247559628}, "ground_truth": 0}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8316429640591855, "res": {"Yes": 0.8316429640591855, "yes": 0.16545718806761192}, "ground_truth": 1}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9229558857886192, "res": {"Yes": 0.9229558857886192, "yes": 0.07369497234178647}, "ground_truth": 0}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7877474779001167, "res": {"Yes": 0.7877474779001167, "yes": 0.2003738314269112}, "ground_truth": 0}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9416720700311417, "res": {"Yes": 0.9416720700311417, "yes": 0.05347863770785523}, "ground_truth": 0}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9352294955735906, "res": {"Yes": 0.9352294955735906, "yes": 0.05806525585146829}, "ground_truth": 0}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8932061800865911, "res": {"Yes": 0.8932061800865911, "yes": 0.10014052794751677}, "ground_truth": 1}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9129566868349163, "res": {"Yes": 0.9129566868349163, "yes": 0.07756335443454891}, "ground_truth": 0}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8884421084588192, "res": {"Yes": 0.8884421084588192, "yes": 0.08496913551678706}, "ground_truth": 0}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9885673966176634, "res": {"Yes": 0.9885673966176634, "yes": 0.009675058437817855}, "ground_truth": 0}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8094534760242454, "res": {"Yes": 0.8094534760242454, "yes": 0.18393230438869276}, "ground_truth": 0}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.834953987867401, "res": {"Yes": 0.834953987867401, "yes": 0.16200080088943966}, "ground_truth": 1}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9651932621855301, "res": {"Yes": 0.9651932621855301, "yes": 0.029431383735091812}, "ground_truth": 0}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.843368463898214, "res": {"Yes": 0.843368463898214, "yes": 0.14698994439784463}, "ground_truth": 0}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6486378307688763, "res": {"Yes": 0.6486378307688763, "yes": 0.3424316168659066}, "ground_truth": 0}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6841409878583529, "res": {"Yes": 0.6841409878583529, "yes": 0.2961656952580424}, "ground_truth": 0}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6211695729451637, "res": {"Yes": 0.6211695729451637, "yes": 0.36950786991424767}, "ground_truth": 1}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8111985610502773, "res": {"Yes": 0.8111985610502773, "yes": 0.17585308092991728}, "ground_truth": 0}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7006207648437717, "res": {"Yes": 0.7006207648437717, "yes": 0.2919079401155211}, "ground_truth": 0}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8498310108737618, "res": {"Yes": 0.8498310108737618, "yes": 0.14170537510680006}, "ground_truth": 0}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9677655852789647, "res": {"Yes": 0.9677655852789647, "yes": 0.020997308838884593}, "ground_truth": 0}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3763742819450225, "res": {"yes": 0.6189369743369855, "Yes": 0.3763742819450225}, "ground_truth": 1}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5877654765793013, "res": {"Yes": 0.5877654765793013, "yes": 0.4064190535831787}, "ground_truth": 0}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9518369670071649, "res": {"Yes": 0.9518369670071649, "yes": 0.04044801538875779}, "ground_truth": 0}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9857031141375194, "res": {"Yes": 0.9857031141375194, "yes": 0.010030826275425762}, "ground_truth": 0}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9571123675158074, "res": {"Yes": 0.9571123675158074, "yes": 0.025842144810758787}, "ground_truth": 0}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8910996741813414, "res": {"Yes": 0.8910996741813414, "yes": 0.10368140449800183}, "ground_truth": 1}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9036507877124906, "res": {"Yes": 0.9036507877124906, "yes": 0.0902943851904658}, "ground_truth": 0}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8663156889096019, "res": {"Yes": 0.8663156889096019, "yes": 0.12906595838272933}, "ground_truth": 0}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8950914071754489, "res": {"Yes": 0.8950914071754489, "yes": 0.09648143235302703}, "ground_truth": 0}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6934303686250269, "res": {"Yes": 0.6934303686250269, "yes": 0.2987057810240759}, "ground_truth": 0}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.985010761981689, "res": {"Yes": 0.985010761981689, "yes": 0.008921341065721036}, "ground_truth": 1}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7768562128393134, "res": {"Yes": 0.7768562128393134, "yes": 0.2097340666601104}, "ground_truth": 0}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6488443185637887, "res": {"Yes": 0.6488443185637887, "yes": 0.3278841940045962}, "ground_truth": 0}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9099656567668996, "res": {"Yes": 0.9099656567668996, "yes": 0.08862348400522378}, "ground_truth": 0}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7521625511101494, "res": {"Yes": 0.7521625511101494, "yes": 0.23834056089082414}, "ground_truth": 0}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9723928213225583, "res": {"Yes": 0.9723928213225583, "yes": 0.02465498420369566}, "ground_truth": 1}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.778422338929631, "res": {"Yes": 0.778422338929631, "yes": 0.2135802155533184}, "ground_truth": 0}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8447996080383532, "res": {"Yes": 0.8447996080383532, "yes": 0.14996380710000473}, "ground_truth": 0}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.811683648642719, "res": {"Yes": 0.811683648642719, "yes": 0.18180919881072158}, "ground_truth": 0}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8534475341917507, "res": {"Yes": 0.8534475341917507, "yes": 0.14036626416335166}, "ground_truth": 0}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8020726573657355, "res": {"Yes": 0.8020726573657355, "yes": 0.19382044301017687}, "ground_truth": 1}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8704578947365029, "res": {"Yes": 0.8704578947365029, "yes": 0.1196608724466638}, "ground_truth": 0}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.85984593597323, "res": {"Yes": 0.85984593597323, "yes": 0.13105568103746054}, "ground_truth": 0}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8673940215362508, "res": {"Yes": 0.8673940215362508, "yes": 0.1263902672295794}, "ground_truth": 0}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8049150057959583, "res": {"Yes": 0.8049150057959583, "yes": 0.18191209556215462}, "ground_truth": 0}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.90068409395178, "res": {"Yes": 0.90068409395178, "yes": 0.09404903347522733}, "ground_truth": 1}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9062014469751889, "res": {"Yes": 0.9062014469751889, "yes": 0.08237279947677832}, "ground_truth": 0}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5944484132234072, "res": {"Yes": 0.5944484132234072, "yes": 0.39990606378140564}, "ground_truth": 0}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7335204879673678, "res": {"Yes": 0.7335204879673678, "yes": 0.2567704201840379}, "ground_truth": 0}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6804531198539424, "res": {"Yes": 0.6804531198539424, "yes": 0.3026979909729833}, "ground_truth": 0}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6783046606357307, "res": {"Yes": 0.6783046606357307, "yes": 0.3150996944221261}, "ground_truth": 1}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8324968341146952, "res": {"Yes": 0.8324968341146952, "yes": 0.15803309592669992}, "ground_truth": 0}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8572574989464238, "res": {"Yes": 0.8572574989464238, "yes": 0.1304579273481114}, "ground_truth": 0}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8305295247406561, "res": {"Yes": 0.8305295247406561, "yes": 0.16181949392531875}, "ground_truth": 0}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9790690742658278, "res": {"Yes": 0.9790690742658278, "yes": 0.01434707773033241}, "ground_truth": 0}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8486683372965612, "res": {"Yes": 0.8486683372965612, "yes": 0.14591837338864663}, "ground_truth": 1}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9214940672843845, "res": {"Yes": 0.9214940672843845, "yes": 0.0720701736922092}, "ground_truth": 0}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.821197284983727, "res": {"Yes": 0.821197284983727, "yes": 0.17203218458382247}, "ground_truth": 0}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9848315230779027, "res": {"Yes": 0.9848315230779027, "yes": 0.011036708783672381}, "ground_truth": 0}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8639806640502307, "res": {"Yes": 0.8639806640502307, "yes": 0.12164893067110431}, "ground_truth": 0}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9750688988439529, "res": {"Yes": 0.9750688988439529, "yes": 0.02167830474254718}, "ground_truth": 1}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9360473604624047, "res": {"Yes": 0.9360473604624047, "yes": 0.06053639043363426}, "ground_truth": 0}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9271888189223717, "res": {"Yes": 0.9271888189223717, "yes": 0.06797624316968118}, "ground_truth": 0}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6554136866759059, "res": {"Yes": 0.6554136866759059, "yes": 0.33152372609060726}, "ground_truth": 0}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7275814408619535, "res": {"Yes": 0.7275814408619535, "yes": 0.26665305100956266}, "ground_truth": 0}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7914942455476555, "res": {"Yes": 0.7914942455476555, "yes": 0.20030972205890055}, "ground_truth": 1}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8159036148916173, "res": {"Yes": 0.8159036148916173, "yes": 0.1748831399205754}, "ground_truth": 0}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8985518653954256, "res": {"Yes": 0.8985518653954256, "yes": 0.08848497636203077}, "ground_truth": 0}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.701920169558787, "res": {"Yes": 0.701920169558787, "yes": 0.2896608567024134}, "ground_truth": 0}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6923049508614169, "res": {"Yes": 0.6923049508614169, "yes": 0.3007394811902535}, "ground_truth": 0}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7867491212303389, "res": {"Yes": 0.7867491212303389, "yes": 0.20939488662870281}, "ground_truth": 1}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7979805781391481, "res": {"Yes": 0.7979805781391481, "yes": 0.19344843264264924}, "ground_truth": 0}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6108449721950155, "res": {"Yes": 0.6108449721950155, "yes": 0.3755562570343848}, "ground_truth": 0}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.993188933378453, "res": {"Yes": 0.993188933378453, "yes": 0.0061638561433142915}, "ground_truth": 0}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9839845305886037, "res": {"Yes": 0.9839845305886037, "yes": 0.012765796921980106}, "ground_truth": 0}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9792798386292411, "res": {"Yes": 0.9792798386292411, "yes": 0.01760490891856962}, "ground_truth": 1}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8509072634211463, "res": {"Yes": 0.8509072634211463, "yes": 0.1461822716446021}, "ground_truth": 0}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9885994425377895, "res": {"Yes": 0.9885994425377895, "yes": 0.00845054789559224}, "ground_truth": 0}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9383985695443607, "res": {"Yes": 0.9383985695443607, "yes": 0.05433792832611859}, "ground_truth": 0}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9324340218645549, "res": {"Yes": 0.9324340218645549, "yes": 0.05381817734570284}, "ground_truth": 0}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9475396461486457, "res": {"Yes": 0.9475396461486457, "yes": 0.049111109846041244}, "ground_truth": 1}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9471613359453309, "res": {"Yes": 0.9471613359453309, "yes": 0.04870844769371837}, "ground_truth": 0}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9328735759959318, "res": {"Yes": 0.9328735759959318, "yes": 0.058590653014115576}, "ground_truth": 0}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9062194715012226, "res": {"Yes": 0.9062194715012226, "yes": 0.09077802845717664}, "ground_truth": 0}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9228385080316857, "res": {"Yes": 0.9228385080316857, "yes": 0.07331009191222393}, "ground_truth": 0}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9495949136584922, "res": {"Yes": 0.9495949136584922, "yes": 0.047370874555214604}, "ground_truth": 1}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9194669068138045, "res": {"Yes": 0.9194669068138045, "yes": 0.07455331290946572}, "ground_truth": 0}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9123651913010103, "res": {"Yes": 0.9123651913010103, "yes": 0.08545311636881062}, "ground_truth": 0}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.852095439699307, "res": {"Yes": 0.852095439699307, "yes": 0.14223372838074863}, "ground_truth": 0}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.847274298203285, "res": {"Yes": 0.847274298203285, "yes": 0.14663779681663977}, "ground_truth": 0}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8167109427245532, "res": {"Yes": 0.8167109427245532, "yes": 0.1772692350738792}, "ground_truth": 1}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8453248649051441, "res": {"Yes": 0.8453248649051441, "yes": 0.1506019824536695}, "ground_truth": 0}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.838830046315853, "res": {"Yes": 0.838830046315853, "yes": 0.15286460255661785}, "ground_truth": 0}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5346151856769301, "res": {"Yes": 0.5346151856769301, "yes": 0.430873541122243}, "ground_truth": 0}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.49505008980881743, "res": {"yes": 0.4978063962975088, "Yes": 0.49505008980881743}, "ground_truth": 0}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8499606029861514, "res": {"Yes": 0.8499606029861514, "yes": 0.13712044209378405}, "ground_truth": 1}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5784614340491178, "res": {"Yes": 0.5784614340491178, "yes": 0.39530379832925366}, "ground_truth": 0}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.82823463428465, "res": {"Yes": 0.82823463428465, "yes": 0.16045513631555516}, "ground_truth": 0}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9437362623265764, "res": {"Yes": 0.9437362623265764, "yes": 0.05172409251096699}, "ground_truth": 0}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9254470601086009, "res": {"Yes": 0.9254470601086009, "yes": 0.06963334719647676}, "ground_truth": 0}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9237362424347944, "res": {"Yes": 0.9237362424347944, "yes": 0.07329825328837472}, "ground_truth": 1}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9404965297881115, "res": {"Yes": 0.9404965297881115, "yes": 0.0545913409209272}, "ground_truth": 0}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9763947513401046, "res": {"Yes": 0.9763947513401046, "yes": 0.018480767171887312}, "ground_truth": 0}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9214145126068572, "res": {"Yes": 0.9214145126068572, "yes": 0.07079800901039354}, "ground_truth": 0}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9175958691273155, "res": {"Yes": 0.9175958691273155, "yes": 0.07553823704522607}, "ground_truth": 0}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6312182289723584, "res": {"Yes": 0.6312182289723584, "yes": 0.36099336511535407}, "ground_truth": 1}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8332510513907537, "res": {"Yes": 0.8332510513907537, "yes": 0.15708834890956458}, "ground_truth": 0}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8956439734789522, "res": {"Yes": 0.8956439734789522, "yes": 0.09830933420793533}, "ground_truth": 0}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6375502509732557, "res": {"Yes": 0.6375502509732557, "yes": 0.35109385433188883}, "ground_truth": 0}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9779147973660893, "res": {"Yes": 0.9779147973660893, "yes": 0.016300126670553736}, "ground_truth": 0}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8047393200752587, "res": {"Yes": 0.8047393200752587, "yes": 0.18867617371520484}, "ground_truth": 1}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9101683505293822, "res": {"Yes": 0.9101683505293822, "yes": 0.08472692848910494}, "ground_truth": 0}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.952498398831016, "res": {"Yes": 0.952498398831016, "yes": 0.04564332517714903}, "ground_truth": 0}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8849096142330738, "res": {"Yes": 0.8849096142330738, "yes": 0.10357007937414679}, "ground_truth": 0}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8010084681476982, "res": {"Yes": 0.8010084681476982, "yes": 0.19255541433589246}, "ground_truth": 0}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7262294407461409, "res": {"Yes": 0.7262294407461409, "yes": 0.2689648707273025}, "ground_truth": 1}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9817044012494364, "res": {"Yes": 0.9817044012494364, "yes": 0.010463063349296075}, "ground_truth": 0}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7168509213606283, "res": {"Yes": 0.7168509213606283, "yes": 0.2764125452759898}, "ground_truth": 0}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6763915869605173, "res": {"Yes": 0.6763915869605173, "yes": 0.31834321046885844}, "ground_truth": 0}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9834511223985887, "res": {"Yes": 0.9834511223985887, "yes": 0.01454143672296603}, "ground_truth": 0}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9100379154586878, "res": {"Yes": 0.9100379154586878, "yes": 0.08324193763804756}, "ground_truth": 1}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9314678069293866, "res": {"Yes": 0.9314678069293866, "yes": 0.06539942582446205}, "ground_truth": 0}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.97719914358088, "res": {"Yes": 0.97719914358088, "yes": 0.01762314824353553}, "ground_truth": 0}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8986966517356525, "res": {"Yes": 0.8986966517356525, "yes": 0.09288436003744426}, "ground_truth": 0}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8728466737594632, "res": {"Yes": 0.8728466737594632, "yes": 0.11119914344811978}, "ground_truth": 0}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8473987465181004, "res": {"Yes": 0.8473987465181004, "yes": 0.13617833029735815}, "ground_truth": 1}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9320266689059349, "res": {"Yes": 0.9320266689059349, "yes": 0.058653138086666744}, "ground_truth": 0}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.867036139221959, "res": {"Yes": 0.867036139221959, "yes": 0.11116047511084215}, "ground_truth": 0}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8151469298219474, "res": {"Yes": 0.8151469298219474, "yes": 0.180164564236806}, "ground_truth": 0}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8761533555099728, "res": {"Yes": 0.8761533555099728, "yes": 0.1113927538315482}, "ground_truth": 0}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8806467396669803, "res": {"Yes": 0.8806467396669803, "yes": 0.11292289932561693}, "ground_truth": 1}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8065595956294841, "res": {"Yes": 0.8065595956294841, "yes": 0.1871284873393441}, "ground_truth": 0}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8676515196431267, "res": {"Yes": 0.8676515196431267, "yes": 0.12747370129653157}, "ground_truth": 0}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7450970057638877, "res": {"Yes": 0.7450970057638877, "yes": 0.24928745764675464}, "ground_truth": 0}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.71357576492592, "res": {"Yes": 0.71357576492592, "yes": 0.28036226737861814}, "ground_truth": 0}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5427487355154056, "res": {"Yes": 0.5427487355154056, "yes": 0.44626904452151667}, "ground_truth": 1}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7086293483756224, "res": {"Yes": 0.7086293483756224, "yes": 0.2850477126267484}, "ground_truth": 0}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7335800889282714, "res": {"Yes": 0.7335800889282714, "yes": 0.26003269652703626}, "ground_truth": 0}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9197566488942561, "res": {"Yes": 0.9197566488942561, "yes": 0.0706195684059104}, "ground_truth": 0}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9074511531697964, "res": {"Yes": 0.9074511531697964, "yes": 0.08142049958710544}, "ground_truth": 0}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8640003371139281, "res": {"Yes": 0.8640003371139281, "yes": 0.1269354740481647}, "ground_truth": 1}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8772660226954513, "res": {"Yes": 0.8772660226954513, "yes": 0.11243031179360964}, "ground_truth": 0}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9304304448400577, "res": {"Yes": 0.9304304448400577, "yes": 0.05978997943758423}, "ground_truth": 0}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7640025592634175, "res": {"Yes": 0.7640025592634175, "yes": 0.22732928906538333}, "ground_truth": 0}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8224657076816878, "res": {"Yes": 0.8224657076816878, "yes": 0.16950183945137762}, "ground_truth": 0}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8480085081880004, "res": {"Yes": 0.8480085081880004, "yes": 0.14394099792544207}, "ground_truth": 1}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8290712173479103, "res": {"Yes": 0.8290712173479103, "yes": 0.15933294060245135}, "ground_truth": 0}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7871981683798812, "res": {"Yes": 0.7871981683798812, "yes": 0.20102873920992206}, "ground_truth": 0}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8581463353636267, "res": {"Yes": 0.8581463353636267, "yes": 0.1212132970174107}, "ground_truth": 0}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9151172132485329, "res": {"Yes": 0.9151172132485329, "yes": 0.07231730706293787}, "ground_truth": 0}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7314368146778724, "res": {"Yes": 0.7314368146778724, "yes": 0.253550896742294}, "ground_truth": 1}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7648419636201379, "res": {"Yes": 0.7648419636201379, "yes": 0.20716665769624268}, "ground_truth": 0}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8227920198688029, "res": {"Yes": 0.8227920198688029, "yes": 0.17403107565422277}, "ground_truth": 0}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7253319259518733, "res": {"Yes": 0.7253319259518733, "yes": 0.2611780291927474}, "ground_truth": 0}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.3886381515269526, "res": {"yes": 0.6011411580870523, "Yes": 0.3886381515269526}, "ground_truth": 0}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4139555698991941, "res": {"yes": 0.5616688711361518, "Yes": 0.4139555698991941}, "ground_truth": 1}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6461484712179753, "res": {"Yes": 0.6461484712179753, "yes": 0.33079897975899764}, "ground_truth": 0}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7685203009668135, "res": {"Yes": 0.7685203009668135, "yes": 0.21905229230939857}, "ground_truth": 0}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9340370485483515, "res": {"Yes": 0.9340370485483515, "yes": 0.06330425072708222}, "ground_truth": 0}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.983996302065954, "res": {"Yes": 0.983996302065954, "yes": 0.012631065292211415}, "ground_truth": 0}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9603468064874559, "res": {"Yes": 0.9603468064874559, "yes": 0.034427304292715705}, "ground_truth": 1}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9843620929282562, "res": {"Yes": 0.9843620929282562, "yes": 0.012403509775623217}, "ground_truth": 0}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7833121291880936, "res": {"Yes": 0.7833121291880936, "yes": 0.21247020759489008}, "ground_truth": 0}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9108587710771322, "res": {"Yes": 0.9108587710771322, "yes": 0.0810679894875135}, "ground_truth": 0}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9502951257003543, "res": {"Yes": 0.9502951257003543, "yes": 0.048904938293650516}, "ground_truth": 0}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7501086247042938, "res": {"Yes": 0.7501086247042938, "yes": 0.23566329707070557}, "ground_truth": 1}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8548038513418272, "res": {"Yes": 0.8548038513418272, "yes": 0.13162622524070028}, "ground_truth": 0}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7718383593195368, "res": {"Yes": 0.7718383593195368, " Yes": 0.1948811487158682}, "ground_truth": 0}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7331978759348383, "res": {"Yes": 0.7331978759348383, "yes": 0.2580746744722673}, "ground_truth": 0}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7765352045074881, "res": {"Yes": 0.7765352045074881, "yes": 0.2162617066148998}, "ground_truth": 0}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8034326645435754, "res": {"Yes": 0.8034326645435754, "yes": 0.191840131928976}, "ground_truth": 1}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9789203150432666, "res": {"Yes": 0.9789203150432666, "yes": 0.01697750578892658}, "ground_truth": 0}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.983772555925036, "res": {"Yes": 0.983772555925036, "yes": 0.011970554935027826}, "ground_truth": 0}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8969745138323088, "res": {"Yes": 0.8969745138323088, "yes": 0.10090952052784924}, "ground_truth": 0}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9464215320922582, "res": {"Yes": 0.9464215320922582, "yes": 0.05061842188865419}, "ground_truth": 0}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8264596609419026, "res": {"Yes": 0.8264596609419026, "yes": 0.1700297427299683}, "ground_truth": 1}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8408403588804215, "res": {"Yes": 0.8408403588804215, "yes": 0.15591928647785172}, "ground_truth": 0}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.760344339516167, "res": {"Yes": 0.760344339516167, "yes": 0.23641259890734245}, "ground_truth": 0}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7494108083912815, "res": {"Yes": 0.7494108083912815, "yes": 0.2395813628975799}, "ground_truth": 0}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6697923366088776, "res": {"Yes": 0.6697923366088776, "yes": 0.3217976973438347}, "ground_truth": 0}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9614847449171198, "res": {"Yes": 0.9614847449171198, "yes": 0.035013660277311565}, "ground_truth": 1}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8894618109167352, "res": {"Yes": 0.8894618109167352, "yes": 0.10660149957943933}, "ground_truth": 0}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6465508044640512, "res": {"Yes": 0.6465508044640512, "yes": 0.35227667626968934}, "ground_truth": 0}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8701533138872706, "res": {"Yes": 0.8701533138872706, "yes": 0.12397443549438054}, "ground_truth": 0}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.967086573127499, "res": {"Yes": 0.967086573127499, "yes": 0.024348968299050675}, "ground_truth": 0}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9678973410149968, "res": {"Yes": 0.9678973410149968, "yes": 0.02719705999340291}, "ground_truth": 1}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9606309719256992, "res": {"Yes": 0.9606309719256992, "yes": 0.02537143358176307}, "ground_truth": 0}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9764622625834776, "res": {"Yes": 0.9764622625834776, "yes": 0.020260463557377126}, "ground_truth": 0}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6998105453996293, "res": {"Yes": 0.6998105453996293, "yes": 0.2760264181253988}, "ground_truth": 0}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5421488197754031, "res": {"Yes": 0.5421488197754031, "yes": 0.4394534997812042}, "ground_truth": 0}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5319750708728406, "res": {"Yes": 0.5319750708728406, "yes": 0.4442669493259448}, "ground_truth": 1}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6111056814727729, "res": {"Yes": 0.6111056814727729, "yes": 0.3733117565450187}, "ground_truth": 0}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8342081979066148, "res": {"Yes": 0.8342081979066148, "yes": 0.16359824209693152}, "ground_truth": 0}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6824587659081175, "res": {"Yes": 0.6824587659081175, "yes": 0.3089159022902069}, "ground_truth": 0}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6951539827805908, "res": {"Yes": 0.6951539827805908, "yes": 0.29649803140793707}, "ground_truth": 0}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6920575237589626, "res": {"Yes": 0.6920575237589626, "yes": 0.2985764992765238}, "ground_truth": 1}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9533164135674667, "res": {"Yes": 0.9533164135674667, "yes": 0.04164534422211943}, "ground_truth": 0}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7606813455665676, "res": {"Yes": 0.7606813455665676, "yes": 0.2293960025762363}, "ground_truth": 0}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7428284122746381, "res": {"Yes": 0.7428284122746381, "yes": 0.24604633675371027}, "ground_truth": 0}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8280179964417864, "res": {"Yes": 0.8280179964417864, "yes": 0.16627015062902423}, "ground_truth": 0}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8194929693743267, "res": {"Yes": 0.8194929693743267, "yes": 0.1725944424269194}, "ground_truth": 1}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8582227052040364, "res": {"Yes": 0.8582227052040364, "yes": 0.1344295108794824}, "ground_truth": 0}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7539900706199127, "res": {"Yes": 0.7539900706199127, "yes": 0.23021466022303677}, "ground_truth": 0}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6058973480004041, "res": {"Yes": 0.6058973480004041, "yes": 0.39114152420421916}, "ground_truth": 0}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9152582070668809, "res": {"Yes": 0.9152582070668809, "yes": 0.08042117233214101}, "ground_truth": 0}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9548578239781512, "res": {"Yes": 0.9548578239781512, "yes": 0.0401797396181848}, "ground_truth": 1}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9719644551170233, "res": {"Yes": 0.9719644551170233, "yes": 0.024060446760708447}, "ground_truth": 0}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5819350473326174, "res": {"Yes": 0.5819350473326174, "yes": 0.4143276885522662}, "ground_truth": 0}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9134234182606134, "res": {"Yes": 0.9134234182606134, "yes": 0.0835650757802233}, "ground_truth": 0}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8037053958698095, "res": {"Yes": 0.8037053958698095, "yes": 0.19097741576399058}, "ground_truth": 0}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8370231672189732, "res": {"Yes": 0.8370231672189732, "yes": 0.15778096582406728}, "ground_truth": 1}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5072697325489073, "res": {"Yes": 0.5072697325489073, "yes": 0.48767929575348107}, "ground_truth": 0}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7711878272329595, "res": {"Yes": 0.7711878272329595, "yes": 0.22459439330781372}, "ground_truth": 0}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7199749494556607, "res": {"Yes": 0.7199749494556607, "yes": 0.2660992551867415}, "ground_truth": 0}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6390440699019551, "res": {"Yes": 0.6390440699019551, "yes": 0.34897279887620414}, "ground_truth": 0}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5904569383802463, "res": {"Yes": 0.5904569383802463, "yes": 0.39862572695390763}, "ground_truth": 1}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9630839360259638, "res": {"Yes": 0.9630839360259638, "yes": 0.03176652235071301}, "ground_truth": 0}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8786476894534053, "res": {"Yes": 0.8786476894534053, "yes": 0.114707427621414}, "ground_truth": 0}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.4154623011876291, "res": {"yes": 0.4984287891674775, "Yes": 0.4154623011876291}, "ground_truth": 0}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5708930442035819, "res": {"Yes": 0.5708930442035819, "yes": 0.24193083486955855}, "ground_truth": 0}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.37594294091256203, "res": {"yes": 0.5013487912527249, "Yes": 0.37594294091256203}, "ground_truth": 1}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.327676217471033, "res": {"yes": 0.6099582480359552, "Yes": 0.327676217471033}, "ground_truth": 0}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.46011823036086824, "res": {"Yes": 0.46011823036086824, "yes": 0.4084979267378907}, "ground_truth": 0}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9262516694484216, "res": {"Yes": 0.9262516694484216, "yes": 0.05290449352307351}, "ground_truth": 0}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9205838229897706, "res": {"Yes": 0.9205838229897706, "yes": 0.06355506395132968}, "ground_truth": 0}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9340370485483515, "res": {"Yes": 0.9340370485483515, "yes": 0.05577552086639109}, "ground_truth": 1}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9452662151571161, "res": {"Yes": 0.9452662151571161, "yes": 0.04446884265927565}, "ground_truth": 0}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9109525442599781, "res": {"Yes": 0.9109525442599781, "yes": 0.07784661988104251}, "ground_truth": 0}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8931321899513465, "res": {"Yes": 0.8931321899513465, "yes": 0.09766588129633158}, "ground_truth": 0}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.943236773027892, "res": {"Yes": 0.943236773027892, "yes": 0.05218767098589495}, "ground_truth": 0}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8822667774020295, "res": {"Yes": 0.8822667774020295, "yes": 0.11256068281084197}, "ground_truth": 1}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9087081349740087, "res": {"Yes": 0.9087081349740087, "yes": 0.08464305647940432}, "ground_truth": 0}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9650115063842397, "res": {"Yes": 0.9650115063842397, "yes": 0.03378245142255773}, "ground_truth": 0}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8152411988634428, "res": {"Yes": 0.8152411988634428, "yes": 0.17187967687885364}, "ground_truth": 0}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8776038351389117, "res": {"Yes": 0.8776038351389117, "yes": 0.11103557872213657}, "ground_truth": 0}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.901564537394367, "res": {"Yes": 0.901564537394367, "yes": 0.0938854365603843}, "ground_truth": 1}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8375506579564568, "res": {"Yes": 0.8375506579564568, "yes": 0.15269376489117867}, "ground_truth": 0}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8763338529293677, "res": {"Yes": 0.8763338529293677, "yes": 0.11315015919114012}, "ground_truth": 0}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8888637520499914, "res": {"Yes": 0.8888637520499914, "yes": 0.10800153921162337}, "ground_truth": 0}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8813517769057817, "res": {"Yes": 0.8813517769057817, "yes": 0.10949416706031372}, "ground_truth": 0}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9911794642012675, "res": {"Yes": 0.9911794642012675, "yes": 0.006180517334079556}, "ground_truth": 1}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9218473952849198, "res": {"Yes": 0.9218473952849198, "yes": 0.07092975910568337}, "ground_truth": 0}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8765227495203994, "res": {"Yes": 0.8765227495203994, "yes": 0.11617174761151611}, "ground_truth": 0}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9687570121426277, "res": {"Yes": 0.9687570121426277, "yes": 0.027612798499252248}, "ground_truth": 0}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9602145595128441, "res": {"Yes": 0.9602145595128441, "yes": 0.033499307902211735}, "ground_truth": 0}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9597928970820431, "res": {"Yes": 0.9597928970820431, "yes": 0.03664889504296027}, "ground_truth": 1}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9527857375873771, "res": {"Yes": 0.9527857375873771, "yes": 0.04283430408965822}, "ground_truth": 0}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9730558224128814, "res": {"Yes": 0.9730558224128814, "yes": 0.024409604428836326}, "ground_truth": 0}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9310575857182892, "res": {"Yes": 0.9310575857182892, "yes": 0.06348778841514754}, "ground_truth": 0}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.901631972435812, "res": {"Yes": 0.901631972435812, "yes": 0.09507428933298595}, "ground_truth": 0}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9707742034002501, "res": {"Yes": 0.9707742034002501, "yes": 0.027394963400078656}, "ground_truth": 1}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9433932388009343, "res": {"Yes": 0.9433932388009343, "yes": 0.04802340417149357}, "ground_truth": 0}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6740487508820727, "res": {"Yes": 0.6740487508820727, "yes": 0.3175653674842471}, "ground_truth": 0}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8130318616344231, "res": {"Yes": 0.8130318616344231, "yes": 0.18063884003855732}, "ground_truth": 0}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9577946986621922, "res": {"Yes": 0.9577946986621922, "yes": 0.0326779480542267}, "ground_truth": 0}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9754607602471924, "res": {"Yes": 0.9754607602471924, "yes": 0.016343287767972767}, "ground_truth": 1}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7624701262169087, "res": {"Yes": 0.7624701262169087, "yes": 0.2300799780157317}, "ground_truth": 0}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9687499654297503, "res": {"Yes": 0.9687499654297503, "yes": 0.021224954298055015}, "ground_truth": 0}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9562998380282696, "res": {"Yes": 0.9562998380282696, "yes": 0.03864280032827352}, "ground_truth": 0}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9667185729133574, "res": {"Yes": 0.9667185729133574, "yes": 0.030416002835868247}, "ground_truth": 0}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9658166896923384, "res": {"Yes": 0.9658166896923384, "yes": 0.03132037608841864}, "ground_truth": 1}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3790608396506645, "res": {"yes": 0.6162712940394435, "Yes": 0.3790608396506645}, "ground_truth": 0}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.3461415789161173, "res": {"yes": 0.6488087953098052, "Yes": 0.3461415789161173}, "ground_truth": 0}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8407048599669801, "res": {"Yes": 0.8407048599669801, "yes": 0.15080139089638323}, "ground_truth": 0}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9120398314737105, "res": {"Yes": 0.9120398314737105, "yes": 0.07685930779007753}, "ground_truth": 0}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8740374021677256, "res": {"Yes": 0.8740374021677256, "yes": 0.11845302624395387}, "ground_truth": 1}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9602696197942497, "res": {"Yes": 0.9602696197942497, "yes": 0.03488743650898369}, "ground_truth": 0}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9726583412276343, "res": {"Yes": 0.9726583412276343, "yes": 0.022342177928676113}, "ground_truth": 0}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6883877084831828, "res": {"Yes": 0.6883877084831828, "yes": 0.30055223895465266}, "ground_truth": 0}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8841707171867756, "res": {"Yes": 0.8841707171867756, "yes": 0.11040034722390733}, "ground_truth": 0}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5516360664766009, "res": {"Yes": 0.5516360664766009, "yes": 0.44539404244020436}, "ground_truth": 1}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7834848528866978, "res": {"Yes": 0.7834848528866978, "yes": 0.2124536355650153}, "ground_truth": 0}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7900780191339138, "res": {"Yes": 0.7900780191339138, "yes": 0.2029690042544773}, "ground_truth": 0}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9197216528195702, "res": {"Yes": 0.9197216528195702, "yes": 0.06685705140464372}, "ground_truth": 0}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.943234125365986, "res": {"Yes": 0.943234125365986, "yes": 0.04682607842269753}, "ground_truth": 0}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8983433536751406, "res": {"Yes": 0.8983433536751406, "yes": 0.08766510932358705}, "ground_truth": 1}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.912490636452552, "res": {"Yes": 0.912490636452552, "yes": 0.07106541415318572}, "ground_truth": 0}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8875633055315925, "res": {"Yes": 0.8875633055315925, "yes": 0.10495425399470361}, "ground_truth": 0}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8334625656811928, "res": {"Yes": 0.8334625656811928, "yes": 0.1597442468230866}, "ground_truth": 0}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8887600676967627, "res": {"Yes": 0.8887600676967627, "yes": 0.07345120285365649}, "ground_truth": 0}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7375280924765483, "res": {"Yes": 0.7375280924765483, "yes": 0.23403404096391892}, "ground_truth": 1}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8856145838157558, "res": {"Yes": 0.8856145838157558, "yes": 0.09109546102972674}, "ground_truth": 0}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6254785451394824, "res": {"Yes": 0.6254785451394824, "yes": 0.3400781278414925}, "ground_truth": 0}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.595815394898112, "res": {"Yes": 0.595815394898112, "yes": 0.3698179061602707}, "ground_truth": 0}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7844537368958544, "res": {"Yes": 0.7844537368958544, "yes": 0.20248254372063998}, "ground_truth": 0}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7283440399998329, "res": {"Yes": 0.7283440399998329, "yes": 0.26343400441461884}, "ground_truth": 1}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7691519536283881, "res": {"Yes": 0.7691519536283881, "yes": 0.21548424094020255}, "ground_truth": 0}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.3427815894806161, "res": {"yes": 0.6454834164759335, "Yes": 0.3427815894806161}, "ground_truth": 0}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9668349022577205, "res": {"Yes": 0.9668349022577205, "yes": 0.030452310780634213}, "ground_truth": 0}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7051452070016568, "res": {"Yes": 0.7051452070016568, "yes": 0.29139993741341563}, "ground_truth": 0}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6956371157729726, "res": {"Yes": 0.6956371157729726, "yes": 0.29628477780152374}, "ground_truth": 1}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5663020420035462, "res": {"Yes": 0.5663020420035462, "yes": 0.4234676733147731}, "ground_truth": 0}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5528952592557127, "res": {"Yes": 0.5528952592557127, "yes": 0.43816019213612517}, "ground_truth": 0}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9187324873725239, "res": {"Yes": 0.9187324873725239, "yes": 0.07603855991959614}, "ground_truth": 0}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8778668660894619, "res": {"Yes": 0.8778668660894619, "yes": 0.10960393520036235}, "ground_truth": 0}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7984031728358348, "res": {"Yes": 0.7984031728358348, "yes": 0.19706103024967667}, "ground_truth": 1}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8400002328016456, "res": {"Yes": 0.8400002328016456, "yes": 0.15192512168600583}, "ground_truth": 0}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9091828590173885, "res": {"Yes": 0.9091828590173885, "yes": 0.08536710839291112}, "ground_truth": 0}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7194007105665603, "res": {"Yes": 0.7194007105665603, "yes": 0.2777700682908939}, "ground_truth": 0}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5002932512426541, "res": {"Yes": 0.5002932512426541, "yes": 0.4938943999147706}, "ground_truth": 0}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6178398731705151, "res": {"Yes": 0.6178398731705151, "yes": 0.37149649856373695}, "ground_truth": 1}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7089059591243405, "res": {"Yes": 0.7089059591243405, "yes": 0.2869411891713188}, "ground_truth": 0}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6985679815270939, "res": {"Yes": 0.6985679815270939, "yes": 0.2946070631060011}, "ground_truth": 0}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9441435959981872, "res": {"Yes": 0.9441435959981872, "yes": 0.047073522077162436}, "ground_truth": 0}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9743795865596572, "res": {"Yes": 0.9743795865596572, "yes": 0.02323834878690595}, "ground_truth": 0}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9701245166914455, "res": {"Yes": 0.9701245166914455, "yes": 0.02663519013764056}, "ground_truth": 1}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9489032480925594, "res": {"Yes": 0.9489032480925594, "yes": 0.0475078651283118}, "ground_truth": 0}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9795137959604471, "res": {"Yes": 0.9795137959604471, "yes": 0.014454904648106877}, "ground_truth": 0}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6903497125521101, "res": {"Yes": 0.6903497125521101, "yes": 0.3013585883120256}, "ground_truth": 0}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7225238855190982, "res": {"Yes": 0.7225238855190982, "yes": 0.2716274012857699}, "ground_truth": 0}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6975481049461816, "res": {"Yes": 0.6975481049461816, "yes": 0.286163429370967}, "ground_truth": 1}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.901093731736948, "res": {"Yes": 0.901093731736948, "yes": 0.0895579390019665}, "ground_truth": 0}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9824961429471141, "res": {"Yes": 0.9824961429471141, "yes": 0.011709388770024923}, "ground_truth": 0}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7945858924802975, "res": {"Yes": 0.7945858924802975, "yes": 0.18791190808185343}, "ground_truth": 0}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.924029390449992, "res": {"Yes": 0.924029390449992, "yes": 0.06584292484982542}, "ground_truth": 0}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8180317409621495, "res": {"Yes": 0.8180317409621495, "yes": 0.1700709459281976}, "ground_truth": 1}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8932723780495326, "res": {"Yes": 0.8932723780495326, "yes": 0.09086879736848673}, "ground_truth": 0}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8582361364944724, "res": {"Yes": 0.8582361364944724, "yes": 0.13239858802497897}, "ground_truth": 0}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9094838074279382, "res": {"Yes": 0.9094838074279382, "yes": 0.08563578792250831}, "ground_truth": 0}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.872363573397236, "res": {"Yes": 0.872363573397236, "yes": 0.12030152252661543}, "ground_truth": 0}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9205149290745005, "res": {"Yes": 0.9205149290745005, "yes": 0.07714031058185178}, "ground_truth": 1}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8975806068557246, "res": {"Yes": 0.8975806068557246, "yes": 0.09687189920764956}, "ground_truth": 0}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9491259220267325, "res": {"Yes": 0.9491259220267325, "yes": 0.046042030184474364}, "ground_truth": 0}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8200004417536664, "res": {"Yes": 0.8200004417536664, "yes": 0.16932803668165872}, "ground_truth": 0}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7250676631496632, "res": {"Yes": 0.7250676631496632, "yes": 0.26944014763380797}, "ground_truth": 0}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6602395732855127, "res": {"Yes": 0.6602395732855127, "yes": 0.33321428415349386}, "ground_truth": 1}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8573157601457453, "res": {"Yes": 0.8573157601457453, "yes": 0.13645420706775735}, "ground_truth": 0}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8143371496279066, "res": {"Yes": 0.8143371496279066, "yes": 0.17896155012248846}, "ground_truth": 0}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.867791361630197, "res": {"Yes": 0.867791361630197, "yes": 0.12092400361512841}, "ground_truth": 0}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7755393548050272, "res": {"Yes": 0.7755393548050272, "yes": 0.21308080905209179}, "ground_truth": 0}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9134981119473342, "res": {"Yes": 0.9134981119473342, "yes": 0.0819812841370667}, "ground_truth": 1}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7436778390168289, "res": {"Yes": 0.7436778390168289, "yes": 0.25028723015602344}, "ground_truth": 0}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8971217606955174, "res": {"Yes": 0.8971217606955174, "yes": 0.09679632986849934}, "ground_truth": 0}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.843327257921738, "res": {"Yes": 0.843327257921738, "yes": 0.15446851342741405}, "ground_truth": 0}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7482548129460392, "res": {"Yes": 0.7482548129460392, "yes": 0.24751989905081556}, "ground_truth": 0}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7608260180938053, "res": {"Yes": 0.7608260180938053, "yes": 0.23151472513132323}, "ground_truth": 1}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7096601397197342, "res": {"Yes": 0.7096601397197342, "yes": 0.2814148398238206}, "ground_truth": 0}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9401844119818809, "res": {"Yes": 0.9401844119818809, "yes": 0.052742963968809856}, "ground_truth": 0}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5039645251475632, "res": {"Yes": 0.5039645251475632, "yes": 0.39810911618666733}, "ground_truth": 0}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.40249180201320445, "res": {"yes": 0.5294884644072458, "Yes": 0.40249180201320445}, "ground_truth": 0}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5520984870781024, "res": {"Yes": 0.5520984870781024, "yes": 0.38536254980459933}, "ground_truth": 1}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6688788676809854, "res": {"Yes": 0.6688788676809854, "yes": 0.2850501925526359}, "ground_truth": 0}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6078900209337803, "res": {"Yes": 0.6078900209337803, "yes": 0.328263742736271}, "ground_truth": 0}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8645706618086174, "res": {"Yes": 0.8645706618086174, "yes": 0.12553035213122696}, "ground_truth": 0}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8969873675691887, "res": {"Yes": 0.8969873675691887, "yes": 0.08963713425943823}, "ground_truth": 0}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8784517113725469, "res": {"Yes": 0.8784517113725469, "yes": 0.11066457494630404}, "ground_truth": 1}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9268968384425064, "res": {"Yes": 0.9268968384425064, "yes": 0.06766783653928511}, "ground_truth": 0}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8489888125212112, "res": {"Yes": 0.8489888125212112, "yes": 0.13759180664319154}, "ground_truth": 0}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9178984879969798, "res": {"Yes": 0.9178984879969798, "yes": 0.07404158382707723}, "ground_truth": 0}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9281368958460736, "res": {"Yes": 0.9281368958460736, "yes": 0.061487750676481975}, "ground_truth": 0}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.926502712107323, "res": {"Yes": 0.926502712107323, "yes": 0.06585247276613586}, "ground_truth": 1}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9131843701007152, "res": {"Yes": 0.9131843701007152, "yes": 0.07995160219024243}, "ground_truth": 0}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8714568566081633, "res": {"Yes": 0.8714568566081633, "yes": 0.11625708385315395}, "ground_truth": 0}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7619347628638982, "res": {"Yes": 0.7619347628638982, "yes": 0.22860766454274437}, "ground_truth": 0}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6372493855166338, "res": {"Yes": 0.6372493855166338, "yes": 0.34997498446854663}, "ground_truth": 0}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7934904772760779, "res": {"Yes": 0.7934904772760779, "yes": 0.1956407127786587}, "ground_truth": 1}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8160984922623064, "res": {"Yes": 0.8160984922623064, "yes": 0.1738066160229394}, "ground_truth": 0}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8529940132323698, "res": {"Yes": 0.8529940132323698, "yes": 0.14130341798825008}, "ground_truth": 0}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8592767774416814, "res": {"Yes": 0.8592767774416814, "yes": 0.12396787742020547}, "ground_truth": 0}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9231042776424166, "res": {"Yes": 0.9231042776424166, "yes": 0.06505086998605425}, "ground_truth": 0}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8074332175740958, "res": {"Yes": 0.8074332175740958, "yes": 0.18119481510381666}, "ground_truth": 1}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9508619376009912, "res": {"Yes": 0.9508619376009912, "yes": 0.03847668521332582}, "ground_truth": 0}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8667211415338708, "res": {"Yes": 0.8667211415338708, "yes": 0.11925379807921468}, "ground_truth": 0}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8442214645928765, "res": {"Yes": 0.8442214645928765, "yes": 0.15011423621051526}, "ground_truth": 0}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9826601539086907, "res": {"Yes": 0.9826601539086907, "yes": 0.011701473557773494}, "ground_truth": 0}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7797279049226716, "res": {"Yes": 0.7797279049226716, "yes": 0.21183551036007964}, "ground_truth": 1}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7879981790320193, "res": {"Yes": 0.7879981790320193, "yes": 0.19473208270080175}, "ground_truth": 0}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8612263302937518, "res": {"Yes": 0.8612263302937518, "yes": 0.12542373429402398}, "ground_truth": 0}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6449640178638968, "res": {"Yes": 0.6449640178638968, "yes": 0.30644744699112847}, "ground_truth": 0}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.5225843880262516, "res": {"Yes": 0.5225843880262516, "yes": 0.4165742341514987}, "ground_truth": 0}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6397645122166438, "res": {"Yes": 0.6397645122166438, "yes": 0.326995391976225}, "ground_truth": 1}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7562795469902375, "res": {"Yes": 0.7562795469902375, "yes": 0.2139381980132513}, "ground_truth": 0}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6958735195125269, "res": {"Yes": 0.6958735195125269, "yes": 0.3001117721142864}, "ground_truth": 0}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9351367067063716, "res": {"Yes": 0.9351367067063716, "yes": 0.06050275970663322}, "ground_truth": 0}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8925758615544392, "res": {"Yes": 0.8925758615544392, "yes": 0.09870866945268905}, "ground_truth": 0}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9008892571241135, "res": {"Yes": 0.9008892571241135, "yes": 0.09377834623435682}, "ground_truth": 1}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8779808908426368, "res": {"Yes": 0.8779808908426368, "yes": 0.1100460350810402}, "ground_truth": 0}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8719867937237976, "res": {"Yes": 0.8719867937237976, "yes": 0.11681070037319516}, "ground_truth": 0}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7739792999505338, "res": {"Yes": 0.7739792999505338, "yes": 0.2042045074077174}, "ground_truth": 0}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7655083928995164, "res": {"Yes": 0.7655083928995164, "yes": 0.21831639831342337}, "ground_truth": 0}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8582461607510882, "res": {"Yes": 0.8582461607510882, "yes": 0.1277357630428807}, "ground_truth": 1}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9216488728595632, "res": {"Yes": 0.9216488728595632, "yes": 0.06937277357799204}, "ground_truth": 0}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8655371345252741, "res": {"Yes": 0.8655371345252741, "yes": 0.12349705121509634}, "ground_truth": 0}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.848327190290246, "res": {"Yes": 0.848327190290246, "yes": 0.13916757450380207}, "ground_truth": 0}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8412346372522941, "res": {"Yes": 0.8412346372522941, "yes": 0.13890507400158755}, "ground_truth": 0}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7538084864405639, "res": {"Yes": 0.7538084864405639, "yes": 0.22936533438090642}, "ground_truth": 1}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8678665069375696, "res": {"Yes": 0.8678665069375696, "yes": 0.11666947990337641}, "ground_truth": 0}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9423172053955048, "res": {"Yes": 0.9423172053955048, "yes": 0.05178021253966603}, "ground_truth": 0}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5644323179067998, "res": {"Yes": 0.5644323179067998, "yes": 0.4245854915440124}, "ground_truth": 0}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.46676619490047444, "res": {"yes": 0.525002092859191, "Yes": 0.46676619490047444}, "ground_truth": 0}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4438974551916938, "res": {"yes": 0.5488547414526506, "Yes": 0.4438974551916938}, "ground_truth": 1}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.654566365496726, "res": {"Yes": 0.654566365496726, "yes": 0.34031269256753566}, "ground_truth": 0}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.568768546510168, "res": {"Yes": 0.568768546510168, "yes": 0.4229918573012424}, "ground_truth": 0}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8674379821792154, "res": {"Yes": 0.8674379821792154, "yes": 0.1255585619706076}, "ground_truth": 0}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7465970378716107, "res": {"Yes": 0.7465970378716107, "yes": 0.23505045165182897}, "ground_truth": 0}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8376708467231038, "res": {"Yes": 0.8376708467231038, "yes": 0.15095118086070003}, "ground_truth": 1}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8824038570166354, "res": {"Yes": 0.8824038570166354, "yes": 0.11163000535327218}, "ground_truth": 0}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.832134710174263, "res": {"Yes": 0.832134710174263, "yes": 0.1588605350059791}, "ground_truth": 0}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5826944380577972, "res": {"Yes": 0.5826944380577972, "yes": 0.4115346328302621}, "ground_truth": 0}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.31107650508417256, "res": {"yes": 0.6783787626842387, "Yes": 0.31107650508417256}, "ground_truth": 0}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.31524669119658777, "res": {"yes": 0.6473200105729705, "Yes": 0.31524669119658777}, "ground_truth": 1}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.45052631083944633, "res": {"yes": 0.5437547070353369, "Yes": 0.45052631083944633}, "ground_truth": 0}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.3443576091830685, "res": {"yes": 0.646644768054185, "Yes": 0.3443576091830685}, "ground_truth": 0}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6468491012790238, "res": {"Yes": 0.6468491012790238, "yes": 0.347219095204308}, "ground_truth": 0}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7005799268496238, "res": {"Yes": 0.7005799268496238, "yes": 0.26958986321547146}, "ground_truth": 0}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5504342592627746, "res": {"Yes": 0.5504342592627746, "yes": 0.4426387076667266}, "ground_truth": 1}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7807123397292145, "res": {"Yes": 0.7807123397292145, "yes": 0.21293451539542696}, "ground_truth": 0}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.560463392961872, "res": {"Yes": 0.560463392961872, "yes": 0.43598052677091576}, "ground_truth": 0}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5435370529737256, "res": {"Yes": 0.5435370529737256, "yes": 0.44597372555904574}, "ground_truth": 0}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.737152675628118, "res": {"Yes": 0.737152675628118, "yes": 0.24904315108539632}, "ground_truth": 0}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8319879095977406, "res": {"Yes": 0.8319879095977406, "yes": 0.16455485488781127}, "ground_truth": 1}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8830768538173055, "res": {"Yes": 0.8830768538173055, "yes": 0.11136546594976147}, "ground_truth": 0}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7107506101443299, "res": {"Yes": 0.7107506101443299, "yes": 0.2835008663246863}, "ground_truth": 0}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8736100499447784, "res": {"Yes": 0.8736100499447784, "yes": 0.120892930139312}, "ground_truth": 0}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9094263198639293, "res": {"Yes": 0.9094263198639293, "yes": 0.08661900761729255}, "ground_truth": 0}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.898607697751556, "res": {"Yes": 0.898607697751556, "yes": 0.09618854531208594}, "ground_truth": 1}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9170709296904958, "res": {"Yes": 0.9170709296904958, "yes": 0.07534469960852393}, "ground_truth": 0}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8280783942771291, "res": {"Yes": 0.8280783942771291, "yes": 0.1607999874957537}, "ground_truth": 0}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.815394209536541, "res": {"Yes": 0.815394209536541, "yes": 0.1766152042919137}, "ground_truth": 0}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7155954039646826, "res": {"Yes": 0.7155954039646826, "yes": 0.27343071949857906}, "ground_truth": 0}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7597430917653897, "res": {"Yes": 0.7597430917653897, "yes": 0.22587073779471742}, "ground_truth": 1}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8129280847409865, "res": {"Yes": 0.8129280847409865, "yes": 0.1756245150814505}, "ground_truth": 0}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6490057062135378, "res": {"Yes": 0.6490057062135378, "yes": 0.34101350444662726}, "ground_truth": 0}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6206865716499202, "res": {"Yes": 0.6206865716499202, "yes": 0.37065713800173383}, "ground_truth": 0}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8644002544356642, "res": {"Yes": 0.8644002544356642, "yes": 0.131938549352402}, "ground_truth": 0}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6903067603292526, "res": {"Yes": 0.6903067603292526, "yes": 0.3003368202361615}, "ground_truth": 1}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7772584487870539, "res": {"Yes": 0.7772584487870539, "yes": 0.21822662324230604}, "ground_truth": 0}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8002616838410573, "res": {"Yes": 0.8002616838410573, "yes": 0.18921358927933798}, "ground_truth": 0}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9581582277341785, "res": {"Yes": 0.9581582277341785, "yes": 0.03367298483668889}, "ground_truth": 0}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9507770161102919, "res": {"Yes": 0.9507770161102919, "yes": 0.04434723132597217}, "ground_truth": 0}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9078366149992715, "res": {"Yes": 0.9078366149992715, "yes": 0.08109814427973994}, "ground_truth": 1}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9277266479370849, "res": {"Yes": 0.9277266479370849, "yes": 0.061616979819851404}, "ground_truth": 0}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9134097846044206, "res": {"Yes": 0.9134097846044206, "yes": 0.07273511490369518}, "ground_truth": 0}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7646846900425294, "res": {"Yes": 0.7646846900425294, "yes": 0.20575655106597562}, "ground_truth": 0}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.776108786372554, "res": {"Yes": 0.776108786372554, "yes": 0.20272323592544145}, "ground_truth": 0}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8071569450540649, "res": {"Yes": 0.8071569450540649, "yes": 0.1522901798726483}, "ground_truth": 1}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7079075591395502, "res": {"Yes": 0.7079075591395502, "yes": 0.25402532810149087}, "ground_truth": 0}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7240869986024319, "res": {"Yes": 0.7240869986024319, "yes": 0.1421999945413682}, "ground_truth": 0}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8618206522506364, "res": {"Yes": 0.8618206522506364, "yes": 0.12667030394333176}, "ground_truth": 0}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8598232793313196, "res": {"Yes": 0.8598232793313196, "yes": 0.1205600873655111}, "ground_truth": 0}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8028017116195557, "res": {"Yes": 0.8028017116195557, "yes": 0.182843533511963}, "ground_truth": 1}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8963310130711819, "res": {"Yes": 0.8963310130711819, "yes": 0.09825216429770703}, "ground_truth": 0}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9119865790226712, "res": {"Yes": 0.9119865790226712, "yes": 0.08003710014463127}, "ground_truth": 0}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8564139785032043, "res": {"Yes": 0.8564139785032043, "yes": 0.13786223356027916}, "ground_truth": 0}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8455095124725917, "res": {"Yes": 0.8455095124725917, "yes": 0.15024091098633485}, "ground_truth": 0}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8314857734470281, "res": {"Yes": 0.8314857734470281, "yes": 0.1627789229018536}, "ground_truth": 1}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8300214127751439, "res": {"Yes": 0.8300214127751439, "yes": 0.16529386321176515}, "ground_truth": 0}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8389548063972708, "res": {"Yes": 0.8389548063972708, "yes": 0.1568654731287396}, "ground_truth": 0}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9682760758922474, "res": {"Yes": 0.9682760758922474, "yes": 0.02767729159559349}, "ground_truth": 0}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8826900281555432, "res": {"Yes": 0.8826900281555432, "yes": 0.1101713387532291}, "ground_truth": 0}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8572172087907993, "res": {"Yes": 0.8572172087907993, "yes": 0.13574364978635065}, "ground_truth": 1}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8704631349088021, "res": {"Yes": 0.8704631349088021, "yes": 0.12079141063793929}, "ground_truth": 0}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8694995958458854, "res": {"Yes": 0.8694995958458854, "yes": 0.1259173549916787}, "ground_truth": 0}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.7580829095255054, "res": {"Yes": 0.7580829095255054, "yes": 0.23250724340730364}, "ground_truth": 0}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8080579279689383, "res": {"Yes": 0.8080579279689383, "yes": 0.18753529711663536}, "ground_truth": 0}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8428880747172065, "res": {"Yes": 0.8428880747172065, "yes": 0.15019592057114542}, "ground_truth": 1}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8769736435729817, "res": {"Yes": 0.8769736435729817, "yes": 0.12041214248358373}, "ground_truth": 0}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9184716549091863, "res": {"Yes": 0.9184716549091863, "yes": 0.07808163476849299}, "ground_truth": 0}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8155838515097346, "res": {"Yes": 0.8155838515097346, "yes": 0.17657592943800482}, "ground_truth": 0}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.6943970457759202, "res": {"Yes": 0.6943970457759202, "yes": 0.2904349941558325}, "ground_truth": 0}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4389460615870426, "res": {"yes": 0.5480398750453758, "Yes": 0.4389460615870426}, "ground_truth": 1}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6850465752264632, "res": {"Yes": 0.6850465752264632, "yes": 0.2960730691160495}, "ground_truth": 0}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9121769806539298, "res": {"Yes": 0.9121769806539298, "yes": 0.07809621397063043}, "ground_truth": 0}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9638705158991197, "res": {"Yes": 0.9638705158991197, "yes": 0.025349504385599263}, "ground_truth": 0}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.581965506610107, "res": {"Yes": 0.581965506610107, "yes": 0.4024477315737846}, "ground_truth": 0}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6686086751386983, "res": {"Yes": 0.6686086751386983, "yes": 0.3163498998402031}, "ground_truth": 1}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.49081591991834184, "res": {"yes": 0.4962502218142402, "Yes": 0.49081591991834184}, "ground_truth": 0}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.7700941491389081, "res": {"Yes": 0.7700941491389081, "yes": 0.22249552276117193}, "ground_truth": 0}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8488184630287905, "res": {"Yes": 0.8488184630287905, "yes": 0.14429961229561444}, "ground_truth": 0}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8059859003425868, "res": {"Yes": 0.8059859003425868, "yes": 0.1888398080072346}, "ground_truth": 0}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5230476552727231, "res": {"Yes": 0.5230476552727231, "yes": 0.46688560896553777}, "ground_truth": 1}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.645260440476863, "res": {"Yes": 0.645260440476863, "yes": 0.34737923484646394}, "ground_truth": 0}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.6628273897351395, "res": {"Yes": 0.6628273897351395, "yes": 0.332414099377698}, "ground_truth": 0}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8443901568933018, "res": {"Yes": 0.8443901568933018, "yes": 0.1482946953798867}, "ground_truth": 0}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.906891274329578, "res": {"Yes": 0.906891274329578, "yes": 0.08634116463557871}, "ground_truth": 0}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8657717874785811, "res": {"Yes": 0.8657717874785811, "yes": 0.12948257019368062}, "ground_truth": 1}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.893625049055654, "res": {"Yes": 0.893625049055654, "yes": 0.099896223184825}, "ground_truth": 0}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8568321219043977, "res": {"Yes": 0.8568321219043977, "yes": 0.13626815590060287}, "ground_truth": 0}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9391513092912538, "res": {"Yes": 0.9391513092912538, "yes": 0.05557034963033024}, "ground_truth": 0}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8688066114215383, "res": {"Yes": 0.8688066114215383, "yes": 0.1262475395158433}, "ground_truth": 0}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8531172883042477, "res": {"Yes": 0.8531172883042477, "yes": 0.14174592281469114}, "ground_truth": 1}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.85264175680997, "res": {"Yes": 0.85264175680997, "yes": 0.1362873029215536}, "ground_truth": 0}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8763574616813741, "res": {"Yes": 0.8763574616813741, "yes": 0.11576886716882867}, "ground_truth": 0}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9365893530096592, "res": {"Yes": 0.9365893530096592, "yes": 0.05937712071261995}, "ground_truth": 0}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9769193020966612, "res": {"Yes": 0.9769193020966612, "yes": 0.019023405300672803}, "ground_truth": 0}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8854832392014416, "res": {"Yes": 0.8854832392014416, "yes": 0.10970777928941253}, "ground_truth": 1}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9736495568282603, "res": {"Yes": 0.9736495568282603, " Yes": 0.016213066301808066}, "ground_truth": 0}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9743532192304301, "res": {"Yes": 0.9743532192304301, " Yes": 0.011788279545204216}, "ground_truth": 0}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9867307244603817, "res": {"Yes": 0.9867307244603817, "yes": 0.01002595548607073}, "ground_truth": 0}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.979637340030919, "res": {"Yes": 0.979637340030919, "yes": 0.015338085427368083}, "ground_truth": 0}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9897341245332607, "res": {"Yes": 0.9897341245332607, "yes": 0.0058001952395786305}, "ground_truth": 1}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9798168720834202, "res": {"Yes": 0.9798168720834202, "yes": 0.01602389873855272}, "ground_truth": 0}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9712039796719621, "res": {"Yes": 0.9712039796719621, "yes": 0.024877467621331806}, "ground_truth": 0}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.933010433923104, "res": {"Yes": 0.933010433923104, "yes": 0.061585797197654134}, "ground_truth": 0}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9049556110940414, "res": {"Yes": 0.9049556110940414, "yes": 0.08669864707401814}, "ground_truth": 0}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9073767542997713, "res": {"Yes": 0.9073767542997713, "yes": 0.08699065325061199}, "ground_truth": 1}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9245950417792344, "res": {"Yes": 0.9245950417792344, "yes": 0.06892306475600062}, "ground_truth": 0}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9450626097305036, "res": {"Yes": 0.9450626097305036, "yes": 0.050630900867620704}, "ground_truth": 0}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.9031109634525408, "res": {"Yes": 0.9031109634525408, "yes": 0.09340470129031736}, "ground_truth": 0}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9178428549377285, "res": {"Yes": 0.9178428549377285, "yes": 0.07995667128250984}, "ground_truth": 0}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8630938027541057, "res": {"Yes": 0.8630938027541057, "yes": 0.13177670731970034}, "ground_truth": 1}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8384299694918701, "res": {"Yes": 0.8384299694918701, "yes": 0.15839168415522145}, "ground_truth": 0}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.739518973470132, "res": {"Yes": 0.739518973470132, "yes": 0.25205705710899756}, "ground_truth": 0}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.5679452078311755, "res": {"Yes": 0.5679452078311755, "yes": 0.4164450994954311}, "ground_truth": 0}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7220836630514115, "res": {"Yes": 0.7220836630514115, "yes": 0.2628530889568796}, "ground_truth": 0}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8102062209124984, "res": {"Yes": 0.8102062209124984, "yes": 0.17856591039617384}, "ground_truth": 1}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8165033040324104, "res": {"Yes": 0.8165033040324104, "yes": 0.16509107296340791}, "ground_truth": 0}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8530247983418212, "res": {"Yes": 0.8530247983418212, "yes": 0.13304671125199682}, "ground_truth": 0}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8803932115521546, "res": {"Yes": 0.8803932115521546, "yes": 0.10666111714239095}, "ground_truth": 0}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.8821379229269936, "res": {"Yes": 0.8821379229269936, "yes": 0.10479480279244985}, "ground_truth": 0}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7346616690236257, "res": {"Yes": 0.7346616690236257, "yes": 0.2520813313723842}, "ground_truth": 1}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8420870376091527, "res": {"Yes": 0.8420870376091527, "yes": 0.13523920224085814}, "ground_truth": 0}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.8313687583738171, "res": {"Yes": 0.8313687583738171, "yes": 0.15669566411146751}, "ground_truth": 0}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.6735399957267363, "res": {"Yes": 0.6735399957267363, "yes": 0.26365390547786766}, "ground_truth": 0}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.7382110645096409, "res": {"Yes": 0.7382110645096409, "yes": 0.2071422756155454}, "ground_truth": 0}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6267958872654629, "res": {"Yes": 0.6267958872654629, "yes": 0.33022230493652815}, "ground_truth": 1}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7833637746592452, "res": {"Yes": 0.7833637746592452, "yes": 0.16735082860625689}, "ground_truth": 0}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.5807846470288018, "res": {"Yes": 0.5807846470288018, "yes": 0.3681692603326818}, "ground_truth": 0}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_random_ft_gpt35", "target_model": "human", "recognition_score": 0.8569765531301443, "res": {"Yes": 0.8569765531301443, "yes": 0.13712932778632506}, "ground_truth": 0}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_random_ft_gpt35", "target_model": "claude", "recognition_score": 0.9241456406592955, "res": {"Yes": 0.9241456406592955, "yes": 0.0650624956152398}, "ground_truth": 0}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_random_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9331722459574142, "res": {"Yes": 0.9331722459574142, "yes": 0.06258773165717436}, "ground_truth": 1}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_random_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8962479001499299, "res": {"Yes": 0.8962479001499299, "yes": 0.09855910105774872}, "ground_truth": 0}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_random_ft_gpt35", "target_model": "llama", "recognition_score": 0.9034002942931074, "res": {"Yes": 0.9034002942931074, "yes": 0.08664702473447962}, "ground_truth": 0}]