[{"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9430231173141567, "res": {"Yes": 0.9430231173141567, "yes": 0.05018094028385046}, "ground_truth": 0}, {"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8917257132904205, "res": {"Yes": 0.8917257132904205, "yes": 0.09483246220628724}, "ground_truth": 0}, {"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9332258115816509, "res": {"Yes": 0.9332258115816509, "yes": 0.06271476328302412}, "ground_truth": 1}, {"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9122722169019558, "res": {"Yes": 0.9122722169019558, "yes": 0.07418831655754006}, "ground_truth": 0}, {"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.852002924199222, "res": {"Yes": 0.852002924199222, "yes": 0.12444180525828848}, "ground_truth": 0}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8376581225995821, "res": {"Yes": 0.8376581225995821, "yes": 0.15602594054681565}, "ground_truth": 0}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9420504241657645, "res": {"Yes": 0.9420504241657645, "yes": 0.04714674534324685}, "ground_truth": 0}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9314745321512307, "res": {"Yes": 0.9314745321512307, "yes": 0.060033382288635764}, "ground_truth": 1}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.871097141191507, "res": {"Yes": 0.871097141191507, "yes": 0.12277979655159513}, "ground_truth": 0}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9478577412121932, "res": {"Yes": 0.9478577412121932, "yes": 0.043710693687877646}, "ground_truth": 0}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5070120238457881, "res": {"Yes": 0.5070120238457881, "yes": 0.47626918970935755}, "ground_truth": 0}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6935264292526944, "res": {"Yes": 0.6935264292526944, "yes": 0.2954875130650208}, "ground_truth": 0}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5614140660837273, "res": {"Yes": 0.5614140660837273, "yes": 0.43185527432674387}, "ground_truth": 1}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7902021501409551, "res": {"Yes": 0.7902021501409551, "yes": 0.20260113196330867}, "ground_truth": 0}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5493349303469905, "res": {"Yes": 0.5493349303469905, "yes": 0.44405919649400954}, "ground_truth": 0}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8606678941879178, "res": {"Yes": 0.8606678941879178, "yes": 0.13431896120172718}, "ground_truth": 0}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9036221605089931, "res": {"Yes": 0.9036221605089931, "yes": 0.0930192981840666}, "ground_truth": 0}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7897030186171211, "res": {"Yes": 0.7897030186171211, "yes": 0.20582705533706283}, "ground_truth": 1}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8260162107899889, "res": {"Yes": 0.8260162107899889, "yes": 0.16803333443591123}, "ground_truth": 0}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8774512769736755, "res": {"Yes": 0.8774512769736755, "yes": 0.11818745657360198}, "ground_truth": 0}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9564664351883299, "res": {"Yes": 0.9564664351883299, "yes": 0.03813480106417883}, "ground_truth": 0}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8973936602864777, "res": {"Yes": 0.8973936602864777, "yes": 0.0935557840256142}, "ground_truth": 0}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9138132683379672, "res": {"Yes": 0.9138132683379672, "yes": 0.08141357099740623}, "ground_truth": 1}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9208588056154562, "res": {"Yes": 0.9208588056154562, "yes": 0.07480923044080635}, "ground_truth": 0}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9191770409205577, "res": {"Yes": 0.9191770409205577, "yes": 0.07245929667152524}, "ground_truth": 0}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8935230029019982, "res": {"Yes": 0.8935230029019982, "yes": 0.10105682465558924}, "ground_truth": 0}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8413108313237865, "res": {"Yes": 0.8413108313237865, "yes": 0.1518009736514507}, "ground_truth": 0}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9048237315686853, "res": {"Yes": 0.9048237315686853, "yes": 0.09034341932129618}, "ground_truth": 1}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.893008776541463, "res": {"Yes": 0.893008776541463, "yes": 0.10146060336854501}, "ground_truth": 0}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9045952110045481, "res": {"Yes": 0.9045952110045481, "yes": 0.0905887179160647}, "ground_truth": 0}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8978186678211553, "res": {"Yes": 0.8978186678211553, "yes": 0.0949302089495546}, "ground_truth": 0}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8893474156955294, "res": {"Yes": 0.8893474156955294, "yes": 0.1022006031057004}, "ground_truth": 0}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.822366532521885, "res": {"Yes": 0.822366532521885, "yes": 0.16742113071668124}, "ground_truth": 1}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8627919712236007, "res": {"Yes": 0.8627919712236007, "yes": 0.12889513657062024}, "ground_truth": 0}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8765915154274068, "res": {"Yes": 0.8765915154274068, "yes": 0.11590264586916302}, "ground_truth": 0}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8973805045918494, "res": {"Yes": 0.8973805045918494, "yes": 0.0936520650138863}, "ground_truth": 0}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9338920925774682, "res": {"Yes": 0.9338920925774682, "yes": 0.05571492581458641}, "ground_truth": 0}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8582623989220622, "res": {"Yes": 0.8582623989220622, "yes": 0.12969989463951093}, "ground_truth": 1}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.835459764079828, "res": {"Yes": 0.835459764079828, "yes": 0.15663818301785642}, "ground_truth": 0}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8956327923293801, "res": {"Yes": 0.8956327923293801, "yes": 0.08895121549851734}, "ground_truth": 0}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9598824115416192, "res": {"Yes": 0.9598824115416192, "yes": 0.036293075989362406}, "ground_truth": 0}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8569499358517726, "res": {"Yes": 0.8569499358517726, "yes": 0.1392341264291424}, "ground_truth": 0}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6513533654289929, "res": {"Yes": 0.6513533654289929, "yes": 0.3447245109232466}, "ground_truth": 1}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8642711053986112, "res": {"Yes": 0.8642711053986112, "yes": 0.12956100858693811}, "ground_truth": 0}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9033356041142926, "res": {"Yes": 0.9033356041142926, "yes": 0.09244590651085353}, "ground_truth": 0}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9511003000661978, "res": {"Yes": 0.9511003000661978, "yes": 0.04511891134840374}, "ground_truth": 0}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7153202031239424, "res": {"Yes": 0.7153202031239424, "yes": 0.280657391743076}, "ground_truth": 0}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5832604216511158, "res": {"Yes": 0.5832604216511158, "yes": 0.413565571626884}, "ground_truth": 1}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7386367658849619, "res": {"Yes": 0.7386367658849619, "yes": 0.2572543743851289}, "ground_truth": 0}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.764219306969252, "res": {"Yes": 0.764219306969252, "yes": 0.23102669785702476}, "ground_truth": 0}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8806488620281803, "res": {"Yes": 0.8806488620281803, "yes": 0.10990924491004875}, "ground_truth": 0}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8531782713075223, "res": {"Yes": 0.8531782713075223, "yes": 0.1377194960162092}, "ground_truth": 0}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8451093176436955, "res": {"Yes": 0.8451093176436955, "yes": 0.150734118325392}, "ground_truth": 1}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8732976417110716, "res": {"Yes": 0.8732976417110716, "yes": 0.1194227452421877}, "ground_truth": 0}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7930454222372204, "res": {"Yes": 0.7930454222372204, "yes": 0.19684064228688924}, "ground_truth": 0}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7940033885888218, "res": {"Yes": 0.7940033885888218, "yes": 0.19356279445692848}, "ground_truth": 0}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8418602941023081, "res": {"Yes": 0.8418602941023081, "yes": 0.14045704029335554}, "ground_truth": 0}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7141783627972856, "res": {"Yes": 0.7141783627972856, "yes": 0.24950143806989003}, "ground_truth": 1}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8247895137898978, "res": {"Yes": 0.8247895137898978, "yes": 0.16157890173325473}, "ground_truth": 0}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9190802658647315, "res": {"Yes": 0.9190802658647315, "yes": 0.07123757934586904}, "ground_truth": 0}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5427693603592247, "res": {"Yes": 0.5427693603592247, "yes": 0.4348478409517858}, "ground_truth": 0}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5396368314652952, "res": {"Yes": 0.5396368314652952, "yes": 0.44905024197747356}, "ground_truth": 0}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6272094331250296, "res": {"Yes": 0.6272094331250296, "yes": 0.3588824074505654}, "ground_truth": 1}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7003465815279138, "res": {"Yes": 0.7003465815279138, "yes": 0.2912673806008543}, "ground_truth": 0}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7073058670355317, "res": {"Yes": 0.7073058670355317, "yes": 0.28343155883605564}, "ground_truth": 0}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8133083882644485, "res": {"Yes": 0.8133083882644485, "yes": 0.17182102435475743}, "ground_truth": 0}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7693259016263512, "res": {"Yes": 0.7693259016263512, "yes": 0.21750864111026194}, "ground_truth": 0}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7350573688192182, "res": {"Yes": 0.7350573688192182, "yes": 0.25342917083040883}, "ground_truth": 1}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.74473569154522, "res": {"Yes": 0.74473569154522, "yes": 0.2396565792919889}, "ground_truth": 0}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7115813230154359, "res": {"Yes": 0.7115813230154359, "yes": 0.2731996861713794}, "ground_truth": 0}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8877151806057111, "res": {"Yes": 0.8877151806057111, "yes": 0.10797836456803674}, "ground_truth": 0}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8272491001017381, "res": {"Yes": 0.8272491001017381, "yes": 0.156812430195793}, "ground_truth": 0}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8554031334978912, "res": {"Yes": 0.8554031334978912, "yes": 0.13568716503460176}, "ground_truth": 1}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8937717675230133, "res": {"Yes": 0.8937717675230133, "yes": 0.0974228398932085}, "ground_truth": 0}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8516003882980198, "res": {"Yes": 0.8516003882980198, "yes": 0.1361237065095074}, "ground_truth": 0}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8588489469638899, "res": {"Yes": 0.8588489469638899, "yes": 0.13422847414681097}, "ground_truth": 0}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8649369715198789, "res": {"Yes": 0.8649369715198789, "yes": 0.12725485131440306}, "ground_truth": 0}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8693656684434947, "res": {"Yes": 0.8693656684434947, "yes": 0.1247713021762874}, "ground_truth": 1}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.822818160386514, "res": {"Yes": 0.822818160386514, "yes": 0.1697372374090299}, "ground_truth": 0}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9458606407498324, "res": {"Yes": 0.9458606407498324, "yes": 0.04988807912775734}, "ground_truth": 0}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6877617738150721, "res": {"Yes": 0.6877617738150721, "yes": 0.2843862584234797}, "ground_truth": 0}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9114209990367658, "res": {"Yes": 0.9114209990367658, "yes": 0.08203751798116958}, "ground_truth": 0}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8255453836234083, "res": {"Yes": 0.8255453836234083, "yes": 0.16419315004134188}, "ground_truth": 1}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8433922978277755, "res": {"Yes": 0.8433922978277755, "yes": 0.14641765360369305}, "ground_truth": 0}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8566639508328788, "res": {"Yes": 0.8566639508328788, "yes": 0.12798489881887604}, "ground_truth": 0}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9209033762602489, "res": {"Yes": 0.9209033762602489, "yes": 0.06811092859960619}, "ground_truth": 0}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9334128394406642, "res": {"Yes": 0.9334128394406642, "yes": 0.054722189153933315}, "ground_truth": 0}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8772716986249798, "res": {"Yes": 0.8772716986249798, "yes": 0.11280101970737956}, "ground_truth": 1}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8703673021933365, "res": {"Yes": 0.8703673021933365, "yes": 0.09554817580508249}, "ground_truth": 0}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9944088009618625, "res": {"Yes": 0.9944088009618625, " Yes": 0.0026960657452745037}, "ground_truth": 0}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.901677822488993, "res": {"Yes": 0.901677822488993, "yes": 0.09504131378195636}, "ground_truth": 0}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9241054227161628, "res": {"Yes": 0.9241054227161628, "yes": 0.06395168516592116}, "ground_truth": 0}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.582562501841341, "res": {"Yes": 0.582562501841341, "yes": 0.40236718972080493}, "ground_truth": 1}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5963022557195562, "res": {"Yes": 0.5963022557195562, "yes": 0.3974791098918266}, "ground_truth": 0}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7746291889508345, "res": {"Yes": 0.7746291889508345, "yes": 0.21699842142658576}, "ground_truth": 0}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7903098778406156, "res": {"Yes": 0.7903098778406156, "yes": 0.20596951635535654}, "ground_truth": 0}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7553325372594731, "res": {"Yes": 0.7553325372594731, "yes": 0.23401786977040948}, "ground_truth": 0}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9269141484025956, "res": {"Yes": 0.9269141484025956, "yes": 0.06780113118706274}, "ground_truth": 1}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9133563845757783, "res": {"Yes": 0.9133563845757783, "yes": 0.08149262937023816}, "ground_truth": 0}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9078306151269097, "res": {"Yes": 0.9078306151269097, "yes": 0.08772304879951535}, "ground_truth": 0}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8830802934083497, "res": {"Yes": 0.8830802934083497, "yes": 0.10896046161883649}, "ground_truth": 0}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8996715830275736, "res": {"Yes": 0.8996715830275736, "yes": 0.09617926356531484}, "ground_truth": 0}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8625924477702778, "res": {"Yes": 0.8625924477702778, "yes": 0.13215814570373208}, "ground_truth": 1}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.96789142718031, "res": {"Yes": 0.96789142718031, "yes": 0.026336999588351527}, "ground_truth": 0}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9425664454440863, "res": {"Yes": 0.9425664454440863, "yes": 0.052385355599870016}, "ground_truth": 0}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8442547782291431, "res": {"Yes": 0.8442547782291431, "yes": 0.1428265959885424}, "ground_truth": 0}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7541314720953434, "res": {"Yes": 0.7541314720953434, "yes": 0.23505578735764146}, "ground_truth": 0}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9192372673732759, "res": {"Yes": 0.9192372673732759, "yes": 0.07452950438374639}, "ground_truth": 1}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.891778715475019, "res": {"Yes": 0.891778715475019, "yes": 0.09827417524764409}, "ground_truth": 0}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8735439114316021, "res": {"Yes": 0.8735439114316021, "yes": 0.11035948457950717}, "ground_truth": 0}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7607693919206477, "res": {"Yes": 0.7607693919206477, "yes": 0.22794217514026183}, "ground_truth": 0}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7408641824698489, "res": {"Yes": 0.7408641824698489, "yes": 0.23908670748972505}, "ground_truth": 0}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.782876783498191, "res": {"Yes": 0.782876783498191, "yes": 0.20194740232834685}, "ground_truth": 1}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8316329927198247, "res": {"Yes": 0.8316329927198247, "yes": 0.15627739152029374}, "ground_truth": 0}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7314404572422795, "res": {"Yes": 0.7314404572422795, "yes": 0.2591061644287162}, "ground_truth": 0}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7916381918641305, "res": {"Yes": 0.7916381918641305, "yes": 0.20067622311072775}, "ground_truth": 0}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8864012081543773, "res": {"Yes": 0.8864012081543773, "yes": 0.10833843436314605}, "ground_truth": 0}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7785977761703575, "res": {"Yes": 0.7785977761703575, "yes": 0.2115682156233755}, "ground_truth": 1}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8403806794355801, "res": {"Yes": 0.8403806794355801, "yes": 0.1515348268883531}, "ground_truth": 0}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7673589844634353, "res": {"Yes": 0.7673589844634353, "yes": 0.22297009938217885}, "ground_truth": 0}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8746641192791644, "res": {"Yes": 0.8746641192791644, "yes": 0.11711549433611096}, "ground_truth": 0}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7828327479176316, "res": {"Yes": 0.7828327479176316, "yes": 0.19225699237431243}, "ground_truth": 0}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8192711216868546, "res": {"Yes": 0.8192711216868546, "yes": 0.16666199493681888}, "ground_truth": 1}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9096378497399382, "res": {"Yes": 0.9096378497399382, "yes": 0.08053854413658655}, "ground_truth": 0}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.584074520908073, "res": {"Yes": 0.584074520908073, "yes": 0.4057767692603613}, "ground_truth": 0}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8559587784639421, "res": {"Yes": 0.8559587784639421, "yes": 0.13495930370866369}, "ground_truth": 0}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8191168019287856, "res": {"Yes": 0.8191168019287856, "yes": 0.17679150121233062}, "ground_truth": 0}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5725354924096564, "res": {"Yes": 0.5725354924096564, "yes": 0.4175605271456307}, "ground_truth": 1}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.46581519535740395, "res": {"yes": 0.5295419454428979, "Yes": 0.46581519535740395}, "ground_truth": 0}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.848248528557098, "res": {"Yes": 0.848248528557098, "yes": 0.14041361768997726}, "ground_truth": 0}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9003313560753251, "res": {"Yes": 0.9003313560753251, "yes": 0.09467845619235243}, "ground_truth": 0}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9772028979872018, "res": {"Yes": 0.9772028979872018, "yes": 0.01850855539523819}, "ground_truth": 0}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9695061333666987, "res": {"Yes": 0.9695061333666987, "yes": 0.027557829262471426}, "ground_truth": 1}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8877568953221273, "res": {"Yes": 0.8877568953221273, "yes": 0.09954581385839825}, "ground_truth": 0}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9382170970567347, "res": {"Yes": 0.9382170970567347, "yes": 0.059100168705332794}, "ground_truth": 0}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9289512057128164, "res": {"Yes": 0.9289512057128164, "yes": 0.06813676797495177}, "ground_truth": 0}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8723768247005593, "res": {"Yes": 0.8723768247005593, "yes": 0.12221755312086215}, "ground_truth": 0}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8669723277104047, "res": {"Yes": 0.8669723277104047, "yes": 0.1296295126401033}, "ground_truth": 1}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8486947907009088, "res": {"Yes": 0.8486947907009088, "yes": 0.1475162116131715}, "ground_truth": 0}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9640060870734322, "res": {"Yes": 0.9640060870734322, "yes": 0.03308240860366821}, "ground_truth": 0}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7786677986168806, "res": {"Yes": 0.7786677986168806, "yes": 0.20682525933924836}, "ground_truth": 0}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.718458380988641, "res": {"Yes": 0.718458380988641, "yes": 0.2723194917416441}, "ground_truth": 0}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5840422224801265, "res": {"Yes": 0.5840422224801265, "yes": 0.4062716314945226}, "ground_truth": 1}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8522049495206657, "res": {"Yes": 0.8522049495206657, "yes": 0.14066030215238912}, "ground_truth": 0}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8255701586121177, "res": {"Yes": 0.8255701586121177, "yes": 0.16321637573051528}, "ground_truth": 0}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8506978213603437, "res": {"Yes": 0.8506978213603437, "yes": 0.14318539784658466}, "ground_truth": 0}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7969773337952746, "res": {"Yes": 0.7969773337952746, "yes": 0.19552760646230236}, "ground_truth": 0}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7116371629927057, "res": {"Yes": 0.7116371629927057, "yes": 0.2785636778414458}, "ground_truth": 1}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8163228359240267, "res": {"Yes": 0.8163228359240267, "yes": 0.16785444062382956}, "ground_truth": 0}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9699057967048247, "res": {"Yes": 0.9699057967048247, "yes": 0.022673251439015517}, "ground_truth": 0}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8184450167937674, "res": {"Yes": 0.8184450167937674, "yes": 0.16445899670486203}, "ground_truth": 0}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.768177217884455, "res": {"Yes": 0.768177217884455, "yes": 0.21773947492696}, "ground_truth": 0}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7867713393392407, "res": {"Yes": 0.7867713393392407, "yes": 0.19222240845230135}, "ground_truth": 1}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8302291682305135, "res": {"Yes": 0.8302291682305135, "yes": 0.15110457986559708}, "ground_truth": 0}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.860030444310599, "res": {"Yes": 0.860030444310599, "yes": 0.12182593686862138}, "ground_truth": 0}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7936220885853003, "res": {"Yes": 0.7936220885853003, "yes": 0.20122767510340214}, "ground_truth": 0}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7530045866741985, "res": {"Yes": 0.7530045866741985, "yes": 0.23944759406482524}, "ground_truth": 0}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7341974728287751, "res": {"Yes": 0.7341974728287751, "yes": 0.2593860464969895}, "ground_truth": 1}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7186757259636489, "res": {"Yes": 0.7186757259636489, "yes": 0.276242548574483}, "ground_truth": 0}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.788882777618287, "res": {"Yes": 0.788882777618287, "yes": 0.20561660192472722}, "ground_truth": 0}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8422609718057267, "res": {"Yes": 0.8422609718057267, "yes": 0.14998028902807037}, "ground_truth": 0}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.878948458091218, "res": {"Yes": 0.878948458091218, "yes": 0.11507709315029797}, "ground_truth": 0}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7749664816871573, "res": {"Yes": 0.7749664816871573, "yes": 0.210457215142142}, "ground_truth": 1}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9225688331226203, "res": {"Yes": 0.9225688331226203, "yes": 0.07080589634791382}, "ground_truth": 0}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9289090220674218, "res": {"Yes": 0.9289090220674218, "yes": 0.06529825277166944}, "ground_truth": 0}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.869218401680187, "res": {"Yes": 0.869218401680187, "yes": 0.10918126459870607}, "ground_truth": 0}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8793312883343577, "res": {"Yes": 0.8793312883343577, "yes": 0.11382372491232093}, "ground_truth": 0}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7653941875672254, "res": {"Yes": 0.7653941875672254, "yes": 0.22661697986211907}, "ground_truth": 1}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8854260352900768, "res": {"Yes": 0.8854260352900768, "yes": 0.1088213643854819}, "ground_truth": 0}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8215147062178363, "res": {"Yes": 0.8215147062178363, "yes": 0.1611284026290116}, "ground_truth": 0}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8334700335392373, "res": {"Yes": 0.8334700335392373, "yes": 0.16143953543718517}, "ground_truth": 0}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7513194511135455, "res": {"Yes": 0.7513194511135455, "yes": 0.22704126361062096}, "ground_truth": 0}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7851380317758876, "res": {"Yes": 0.7851380317758876, "yes": 0.20322878801710534}, "ground_truth": 1}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8854255695561047, "res": {"Yes": 0.8854255695561047, "yes": 0.09773591329651513}, "ground_truth": 0}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8400686200042613, "res": {"Yes": 0.8400686200042613, "yes": 0.1297720278320998}, "ground_truth": 0}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9020372208233344, "res": {"Yes": 0.9020372208233344, "yes": 0.09716086228407407}, "ground_truth": 0}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8705747789648762, "res": {"Yes": 0.8705747789648762, "yes": 0.12390336892979928}, "ground_truth": 0}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8765898762028056, "res": {"Yes": 0.8765898762028056, "yes": 0.12027063306200762}, "ground_truth": 1}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9115427055623951, "res": {"Yes": 0.9115427055623951, "yes": 0.08571685485175659}, "ground_truth": 0}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9923138665585501, "res": {"Yes": 0.9923138665585501, "yes": 0.005878974129882032}, "ground_truth": 0}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7173587893400567, "res": {"Yes": 0.7173587893400567, "yes": 0.2702095256517267}, "ground_truth": 0}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7139285440586175, "res": {"Yes": 0.7139285440586175, "yes": 0.28043777910442075}, "ground_truth": 0}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.79676629026581, "res": {"Yes": 0.79676629026581, "yes": 0.1921969214507661}, "ground_truth": 1}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5416070448450306, "res": {"Yes": 0.5416070448450306, "yes": 0.4499480532965169}, "ground_truth": 0}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8843191960625634, "res": {"Yes": 0.8843191960625634, "yes": 0.11186579333340733}, "ground_truth": 0}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8471772654345994, "res": {"Yes": 0.8471772654345994, "yes": 0.13906750726095987}, "ground_truth": 0}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9384637586035781, "res": {"Yes": 0.9384637586035781, "yes": 0.05823094875686999}, "ground_truth": 0}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9203336423077768, "res": {"Yes": 0.9203336423077768, "yes": 0.0741017151888455}, "ground_truth": 1}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9158951462678505, "res": {"Yes": 0.9158951462678505, "yes": 0.07819201082602235}, "ground_truth": 0}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9148361049717656, "res": {"Yes": 0.9148361049717656, "yes": 0.07552322908861506}, "ground_truth": 0}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9587456341507088, "res": {"Yes": 0.9587456341507088, "yes": 0.02280983674224826}, "ground_truth": 0}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8110251615572283, "res": {"Yes": 0.8110251615572283, "yes": 0.18261132352705067}, "ground_truth": 0}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7077443769416009, "res": {"Yes": 0.7077443769416009, "yes": 0.28072767715333463}, "ground_truth": 1}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8602338225526014, "res": {"Yes": 0.8602338225526014, "yes": 0.13048620064426872}, "ground_truth": 0}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8205169422297738, "res": {"Yes": 0.8205169422297738, "yes": 0.16792162941454747}, "ground_truth": 0}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9122501857939443, "res": {"Yes": 0.9122501857939443, "yes": 0.07101972667772616}, "ground_truth": 0}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5209763925810521, "res": {"Yes": 0.5209763925810521, "yes": 0.46518709475767167}, "ground_truth": 0}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.41177399334040476, "res": {"yes": 0.5728791740700198, "Yes": 0.41177399334040476}, "ground_truth": 1}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7227069023189377, "res": {"Yes": 0.7227069023189377, "yes": 0.2696733950935154}, "ground_truth": 0}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9661725456835618, "res": {"Yes": 0.9661725456835618, "yes": 0.02585064310501557}, "ground_truth": 0}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6164895420155784, "res": {"Yes": 0.6164895420155784, "yes": 0.37766812318714144}, "ground_truth": 0}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9576218380664714, "res": {"Yes": 0.9576218380664714, "yes": 0.033585679776122646}, "ground_truth": 0}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5949488758912128, "res": {"Yes": 0.5949488758912128, "yes": 0.39673624078021585}, "ground_truth": 1}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9710288271756826, "res": {"Yes": 0.9710288271756826, "yes": 0.025383345728248397}, "ground_truth": 0}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.765862599003627, "res": {"Yes": 0.765862599003627, "yes": 0.22462271644654616}, "ground_truth": 0}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7887915959279694, "res": {"Yes": 0.7887915959279694, "yes": 0.2019022115567417}, "ground_truth": 0}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7778616487402321, "res": {"Yes": 0.7778616487402321, "yes": 0.21029812755464933}, "ground_truth": 0}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7800958205379731, "res": {"Yes": 0.7800958205379731, "yes": 0.2134077337805666}, "ground_truth": 1}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7897211583037922, "res": {"Yes": 0.7897211583037922, "yes": 0.20077588359416992}, "ground_truth": 0}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6621490747183042, "res": {"Yes": 0.6621490747183042, "yes": 0.32937818167461125}, "ground_truth": 0}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.934733435558782, "res": {"Yes": 0.934733435558782, "yes": 0.055277252625889575}, "ground_truth": 0}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9598041036893334, "res": {"Yes": 0.9598041036893334, "yes": 0.03246557440903057}, "ground_truth": 0}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9575334096032836, "res": {"Yes": 0.9575334096032836, "yes": 0.03591646333101006}, "ground_truth": 1}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9471272245440816, "res": {"Yes": 0.9471272245440816, "yes": 0.04355493343543573}, "ground_truth": 0}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9386152858091091, "res": {"Yes": 0.9386152858091091, "yes": 0.05534707869454582}, "ground_truth": 0}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.771363986649339, "res": {"Yes": 0.771363986649339, "yes": 0.2132494013249972}, "ground_truth": 0}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8430987556304301, "res": {"Yes": 0.8430987556304301, "yes": 0.1445416323178372}, "ground_truth": 0}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8195177020453566, "res": {"Yes": 0.8195177020453566, "yes": 0.16673712642754657}, "ground_truth": 1}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8499446748736983, "res": {"Yes": 0.8499446748736983, "yes": 0.13719634554698712}, "ground_truth": 0}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.854412244302926, "res": {"Yes": 0.854412244302926, "yes": 0.1322130423801487}, "ground_truth": 0}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.3593495454468602, "res": {"yes": 0.5296533198246303, "Yes": 0.3593495454468602}, "ground_truth": 0}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.4650846253281186, "res": {"Yes": 0.4650846253281186, "yes": 0.4363723178541512}, "ground_truth": 0}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3249459559060113, "res": {"yes": 0.4450070563390195, "Yes": 0.3249459559060113}, "ground_truth": 1}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8718790315352628, "res": {"Yes": 0.8718790315352628, "yes": 0.1251982873152951}, "ground_truth": 0}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.44347536303194723, "res": {"yes": 0.4781886782507015, "Yes": 0.44347536303194723}, "ground_truth": 0}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8290567170191246, "res": {"Yes": 0.8290567170191246, "yes": 0.16590829218548137}, "ground_truth": 0}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9788747317012014, "res": {"Yes": 0.9788747317012014, "yes": 0.019534254763246446}, "ground_truth": 0}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6878243148365496, "res": {"Yes": 0.6878243148365496, "yes": 0.30944658084294896}, "ground_truth": 1}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8864989126799375, "res": {"Yes": 0.8864989126799375, "yes": 0.11127425046305077}, "ground_truth": 0}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8745700192287742, "res": {"Yes": 0.8745700192287742, "yes": 0.12083881848828552}, "ground_truth": 0}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9494733505009328, "res": {"Yes": 0.9494733505009328, "yes": 0.04026617067094264}, "ground_truth": 0}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8393070399884183, "res": {"Yes": 0.8393070399884183, "yes": 0.14753050662665904}, "ground_truth": 0}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8907315033195298, "res": {"Yes": 0.8907315033195298, "yes": 0.0967211483320009}, "ground_truth": 1}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9192777698595396, "res": {"Yes": 0.9192777698595396, "yes": 0.0722316618089034}, "ground_truth": 0}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9924927916185625, "res": {"Yes": 0.9924927916185625, "yes": 0.004290094578563624}, "ground_truth": 0}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7874175517136637, "res": {"Yes": 0.7874175517136637, "yes": 0.2062373064801384}, "ground_truth": 0}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8075861841362715, "res": {"Yes": 0.8075861841362715, "yes": 0.1871709141763242}, "ground_truth": 0}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7984702574886033, "res": {"Yes": 0.7984702574886033, "yes": 0.1981708780541788}, "ground_truth": 1}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8764130332698978, "res": {"Yes": 0.8764130332698978, "yes": 0.11526523672349812}, "ground_truth": 0}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6875458230179922, "res": {"Yes": 0.6875458230179922, "yes": 0.3045361742614613}, "ground_truth": 0}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9757936803497349, "res": {"Yes": 0.9757936803497349, "yes": 0.020425207074521418}, "ground_truth": 0}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.933112034960822, "res": {"Yes": 0.933112034960822, "yes": 0.06428030456661392}, "ground_truth": 0}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9644726082382326, "res": {"Yes": 0.9644726082382326, "yes": 0.0314305697736312}, "ground_truth": 1}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9216893802176681, "res": {"Yes": 0.9216893802176681, "yes": 0.07610924017504568}, "ground_truth": 0}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8029514159832084, "res": {"Yes": 0.8029514159832084, "yes": 0.1951892262339456}, "ground_truth": 0}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.880842678941651, "res": {"Yes": 0.880842678941651, "yes": 0.11269935287129093}, "ground_truth": 0}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8821696451770646, "res": {"Yes": 0.8821696451770646, "yes": 0.1152124573324819}, "ground_truth": 0}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9210946953892728, "res": {"Yes": 0.9210946953892728, "yes": 0.07545944646112336}, "ground_truth": 1}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7747506835732896, "res": {"Yes": 0.7747506835732896, "yes": 0.2152535808586773}, "ground_truth": 0}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8817246499337347, "res": {"Yes": 0.8817246499337347, "yes": 0.11627488417527174}, "ground_truth": 0}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9521916704019533, "res": {"Yes": 0.9521916704019533, "yes": 0.036980442025438154}, "ground_truth": 0}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5353886431599084, "res": {"Yes": 0.5353886431599084, "yes": 0.4587976572478929}, "ground_truth": 0}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6974113082517648, "res": {"Yes": 0.6974113082517648, "yes": 0.2929740032531286}, "ground_truth": 1}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9627864728255245, "res": {"Yes": 0.9627864728255245, "yes": 0.02951096764956984}, "ground_truth": 0}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9584327918536978, "res": {"Yes": 0.9584327918536978, "yes": 0.0330831496579207}, "ground_truth": 0}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6756598770603958, "res": {"Yes": 0.6756598770603958, "yes": 0.3130761585462123}, "ground_truth": 0}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8476313978791087, "res": {"Yes": 0.8476313978791087, "yes": 0.14843267359651593}, "ground_truth": 0}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8098428682086375, "res": {"Yes": 0.8098428682086375, "yes": 0.17706474895439747}, "ground_truth": 1}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5593707990832436, "res": {"Yes": 0.5593707990832436, "yes": 0.34782374757284573}, "ground_truth": 0}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9559336950292355, "res": {"Yes": 0.9559336950292355, "yes": 0.04040740979764283}, "ground_truth": 0}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9784113031381065, "res": {"Yes": 0.9784113031381065, "yes": 0.012990594284005811}, "ground_truth": 0}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9553322416383783, "res": {"Yes": 0.9553322416383783, "yes": 0.03261338474083615}, "ground_truth": 0}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9614402224738952, "res": {"Yes": 0.9614402224738952, "yes": 0.03152570607746665}, "ground_truth": 1}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9719375379061277, "res": {"Yes": 0.9719375379061277, "yes": 0.018845172475186224}, "ground_truth": 0}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9098207562740674, "res": {"Yes": 0.9098207562740674, "yes": 0.0829676090303754}, "ground_truth": 0}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9063679133447727, "res": {"Yes": 0.9063679133447727, "yes": 0.08864816021794027}, "ground_truth": 0}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8406163972491133, "res": {"Yes": 0.8406163972491133, "yes": 0.15535727725272097}, "ground_truth": 0}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7831993638584615, "res": {"Yes": 0.7831993638584615, "yes": 0.21239203294351325}, "ground_truth": 1}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8434952401396522, "res": {"Yes": 0.8434952401396522, "yes": 0.15324140355452598}, "ground_truth": 0}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6490191667314698, "res": {"Yes": 0.6490191667314698, "yes": 0.3450476691659748}, "ground_truth": 0}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.892582802251324, "res": {"Yes": 0.892582802251324, "yes": 0.10168443864250076}, "ground_truth": 0}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8753027959260152, "res": {"Yes": 0.8753027959260152, "yes": 0.1155759487715338}, "ground_truth": 0}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8911158566983614, "res": {"Yes": 0.8911158566983614, "yes": 0.10164470812772748}, "ground_truth": 1}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8935342345567354, "res": {"Yes": 0.8935342345567354, "yes": 0.09714159719527982}, "ground_truth": 0}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.929429994388664, "res": {"Yes": 0.929429994388664, "yes": 0.05946125232475636}, "ground_truth": 0}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6883066969009914, "res": {"Yes": 0.6883066969009914, "yes": 0.2988255860615711}, "ground_truth": 0}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8834520247985831, "res": {"Yes": 0.8834520247985831, "yes": 0.10680267563582081}, "ground_truth": 0}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6918696002959421, "res": {"Yes": 0.6918696002959421, "yes": 0.29810199080731453}, "ground_truth": 1}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8627206212779416, "res": {"Yes": 0.8627206212779416, "yes": 0.12859005538369786}, "ground_truth": 0}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6797644757495648, "res": {"Yes": 0.6797644757495648, "yes": 0.29563393384442566}, "ground_truth": 0}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8617191185228577, "res": {"Yes": 0.8617191185228577, "yes": 0.13197259388969135}, "ground_truth": 0}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8795340239436796, "res": {"Yes": 0.8795340239436796, "yes": 0.11558691744952777}, "ground_truth": 0}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8916759563838496, "res": {"Yes": 0.8916759563838496, "yes": 0.10099442113031247}, "ground_truth": 1}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8981592480891563, "res": {"Yes": 0.8981592480891563, "yes": 0.09607539799451222}, "ground_truth": 0}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9097187346949157, "res": {"Yes": 0.9097187346949157, "yes": 0.08396717754591242}, "ground_truth": 0}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8023786904120993, "res": {"Yes": 0.8023786904120993, "yes": 0.19410475333567373}, "ground_truth": 0}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8155017834360815, "res": {"Yes": 0.8155017834360815, "yes": 0.15640424645386006}, "ground_truth": 0}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7352498173257137, "res": {"Yes": 0.7352498173257137, "yes": 0.2590856181245487}, "ground_truth": 1}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9207476186277276, "res": {"Yes": 0.9207476186277276, "yes": 0.07498274189435639}, "ground_truth": 0}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7946330207671561, "res": {"Yes": 0.7946330207671561, "yes": 0.20006691367792476}, "ground_truth": 0}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7701598948827371, "res": {"Yes": 0.7701598948827371, "yes": 0.22218669101335847}, "ground_truth": 0}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.652533663203232, "res": {"Yes": 0.652533663203232, "yes": 0.34012669446572275}, "ground_truth": 0}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6552181338565746, "res": {"Yes": 0.6552181338565746, "yes": 0.33757343741452456}, "ground_truth": 1}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.966963615886898, "res": {"Yes": 0.966963615886898, "yes": 0.023499268170447993}, "ground_truth": 0}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.701621951951007, "res": {"Yes": 0.701621951951007, "yes": 0.2854934478176605}, "ground_truth": 0}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5842059232504756, "res": {"Yes": 0.5842059232504756, "yes": 0.40029996516734023}, "ground_truth": 0}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7985232457330208, "res": {"Yes": 0.7985232457330208, "yes": 0.19016541125667735}, "ground_truth": 0}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.778944858965426, "res": {"Yes": 0.778944858965426, "yes": 0.21005102533358752}, "ground_truth": 1}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5739037612770674, "res": {"Yes": 0.5739037612770674, "yes": 0.4137033946353882}, "ground_truth": 0}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6734189714638599, "res": {"Yes": 0.6734189714638599, "yes": 0.3060906129846552}, "ground_truth": 0}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8119394342210773, "res": {"Yes": 0.8119394342210773, "yes": 0.18575811659495545}, "ground_truth": 0}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8613274097471014, "res": {"Yes": 0.8613274097471014, "yes": 0.1317187514944688}, "ground_truth": 0}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6325690970495982, "res": {"Yes": 0.6325690970495982, "yes": 0.36132696873782433}, "ground_truth": 1}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8106048674957093, "res": {"Yes": 0.8106048674957093, "yes": 0.18425824498738494}, "ground_truth": 0}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5923950218399403, "res": {"Yes": 0.5923950218399403, "yes": 0.3976571647548321}, "ground_truth": 0}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8632392722103233, "res": {"Yes": 0.8632392722103233, "yes": 0.12683878605974197}, "ground_truth": 0}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8500444132333831, "res": {"Yes": 0.8500444132333831, "yes": 0.14629499237657492}, "ground_truth": 0}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8368967276799525, "res": {"Yes": 0.8368967276799525, "yes": 0.15363137727447643}, "ground_truth": 1}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9368980381568713, "res": {"Yes": 0.9368980381568713, "yes": 0.054003425459206014}, "ground_truth": 0}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8852781142071802, "res": {"Yes": 0.8852781142071802, "yes": 0.11222294566320759}, "ground_truth": 0}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9171527452374616, "res": {"Yes": 0.9171527452374616, "yes": 0.07491026862941555}, "ground_truth": 0}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8442076194743875, "res": {"Yes": 0.8442076194743875, "yes": 0.1437012050584743}, "ground_truth": 0}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7935553477741407, "res": {"Yes": 0.7935553477741407, "yes": 0.18785961947413152}, "ground_truth": 1}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7912623717048869, "res": {"Yes": 0.7912623717048869, "yes": 0.19733268860025815}, "ground_truth": 0}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8345434000355851, "res": {"Yes": 0.8345434000355851, "yes": 0.1553519641246939}, "ground_truth": 0}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6476852473712107, "res": {"Yes": 0.6476852473712107, "yes": 0.3383562988850562}, "ground_truth": 0}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.782858386109949, "res": {"Yes": 0.782858386109949, "yes": 0.20734643201197753}, "ground_truth": 0}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.851066712852647, "res": {"Yes": 0.851066712852647, "yes": 0.14547597882301516}, "ground_truth": 1}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7954856582222999, "res": {"Yes": 0.7954856582222999, "yes": 0.20060329055992118}, "ground_truth": 0}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8080073451259069, "res": {"Yes": 0.8080073451259069, "yes": 0.1843680218864148}, "ground_truth": 0}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5109341152834983, "res": {"Yes": 0.5109341152834983, "yes": 0.48484006757691944}, "ground_truth": 0}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8660257296056786, "res": {"Yes": 0.8660257296056786, "yes": 0.13048676173613785}, "ground_truth": 0}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7858964800482379, "res": {"Yes": 0.7858964800482379, "yes": 0.21087985821867436}, "ground_truth": 1}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7037721892553033, "res": {"Yes": 0.7037721892553033, "yes": 0.29024650494784354}, "ground_truth": 0}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6914123772041603, "res": {"Yes": 0.6914123772041603, "yes": 0.2981736035058473}, "ground_truth": 0}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8678558929950942, "res": {"Yes": 0.8678558929950942, "yes": 0.11103788828619339}, "ground_truth": 0}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8476934043837238, "res": {"Yes": 0.8476934043837238, "yes": 0.1417651448650504}, "ground_truth": 0}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8644780885386122, "res": {"Yes": 0.8644780885386122, "yes": 0.12189517803550275}, "ground_truth": 1}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.857911896069798, "res": {"Yes": 0.857911896069798, "yes": 0.13194591172886422}, "ground_truth": 0}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8660506628453463, "res": {"Yes": 0.8660506628453463, "yes": 0.12472536983948822}, "ground_truth": 0}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8233380905903147, "res": {"Yes": 0.8233380905903147, "yes": 0.1646148460651457}, "ground_truth": 0}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9438645266875932, "res": {"Yes": 0.9438645266875932, "yes": 0.044990496518186054}, "ground_truth": 0}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6043132135353779, "res": {"Yes": 0.6043132135353779, "yes": 0.38355745957418264}, "ground_truth": 1}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7612881547742004, "res": {"Yes": 0.7612881547742004, "yes": 0.22601366882776558}, "ground_truth": 0}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9137147280284986, "res": {"Yes": 0.9137147280284986, "yes": 0.08365607752286734}, "ground_truth": 0}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8190975283371908, "res": {"Yes": 0.8190975283371908, "yes": 0.16746863482960536}, "ground_truth": 0}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7722729339904519, "res": {"Yes": 0.7722729339904519, "yes": 0.21483360085922074}, "ground_truth": 0}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6882475807775729, "res": {"Yes": 0.6882475807775729, "yes": 0.2975785366785238}, "ground_truth": 1}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7375558978097618, "res": {"Yes": 0.7375558978097618, "yes": 0.2518388198183457}, "ground_truth": 0}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8303378772519908, "res": {"Yes": 0.8303378772519908, "yes": 0.13274716168402015}, "ground_truth": 0}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8893137598687695, "res": {"Yes": 0.8893137598687695, "yes": 0.10302449535977724}, "ground_truth": 0}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8215757798911768, "res": {"Yes": 0.8215757798911768, "yes": 0.16757372086102174}, "ground_truth": 0}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8998701894544872, "res": {"Yes": 0.8998701894544872, "yes": 0.0899541934395196}, "ground_truth": 1}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9292492935888562, "res": {"Yes": 0.9292492935888562, "yes": 0.06454968768781182}, "ground_truth": 0}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8651168884703999, "res": {"Yes": 0.8651168884703999, "yes": 0.12602061176157095}, "ground_truth": 0}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8666942909288241, "res": {"Yes": 0.8666942909288241, "yes": 0.12598156402474536}, "ground_truth": 0}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9514889933456664, "res": {"Yes": 0.9514889933456664, "yes": 0.03855958391053136}, "ground_truth": 0}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7337319557243507, "res": {"Yes": 0.7337319557243507, "yes": 0.2599570640168985}, "ground_truth": 1}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6694393553200096, "res": {"Yes": 0.6694393553200096, "yes": 0.3274637883315963}, "ground_truth": 0}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8354063297821646, "res": {"Yes": 0.8354063297821646, "yes": 0.154434750300823}, "ground_truth": 0}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8662213002945234, "res": {"Yes": 0.8662213002945234, "yes": 0.125799513748187}, "ground_truth": 0}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8864266925554483, "res": {"Yes": 0.8864266925554483, "yes": 0.10777138510918191}, "ground_truth": 0}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9140363939451338, "res": {"Yes": 0.9140363939451338, "yes": 0.08072171151920174}, "ground_truth": 1}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.926253716466873, "res": {"Yes": 0.926253716466873, "yes": 0.06748662829913361}, "ground_truth": 0}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9586021110855072, "res": {"Yes": 0.9586021110855072, "yes": 0.03767530646738783}, "ground_truth": 0}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8824237289752529, "res": {"Yes": 0.8824237289752529, "yes": 0.11402734396581975}, "ground_truth": 0}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7045420652905741, "res": {"Yes": 0.7045420652905741, "yes": 0.2860860040231012}, "ground_truth": 0}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7937135588018797, "res": {"Yes": 0.7937135588018797, "yes": 0.1930056144665718}, "ground_truth": 1}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9189407233115013, "res": {"Yes": 0.9189407233115013, "yes": 0.07164613993930694}, "ground_truth": 0}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8918339842817287, "res": {"Yes": 0.8918339842817287, "yes": 0.10266096725167558}, "ground_truth": 0}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5305984279822996, "res": {"Yes": 0.5305984279822996, "yes": 0.4603171364405378}, "ground_truth": 0}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6845920811995903, "res": {"Yes": 0.6845920811995903, "yes": 0.3013013958796975}, "ground_truth": 0}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7373069557110802, "res": {"Yes": 0.7373069557110802, "yes": 0.2467199725171631}, "ground_truth": 1}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8161504222615045, "res": {"Yes": 0.8161504222615045, "yes": 0.17374475188010602}, "ground_truth": 0}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8233724409719981, "res": {"Yes": 0.8233724409719981, "yes": 0.16227875704445066}, "ground_truth": 0}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.879227225634415, "res": {"Yes": 0.879227225634415, "yes": 0.11434353466378279}, "ground_truth": 0}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8814031788402766, "res": {"Yes": 0.8814031788402766, "yes": 0.10902644625383422}, "ground_truth": 0}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8751623473555454, "res": {"Yes": 0.8751623473555454, "yes": 0.11440188761974152}, "ground_truth": 1}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8900145304864108, "res": {"Yes": 0.8900145304864108, "yes": 0.10503412307006874}, "ground_truth": 0}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8905870205743736, "res": {"Yes": 0.8905870205743736, "yes": 0.10155958585395758}, "ground_truth": 0}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8400123540924598, "res": {"Yes": 0.8400123540924598, "yes": 0.1547683184419795}, "ground_truth": 0}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7711231966895399, "res": {"Yes": 0.7711231966895399, "yes": 0.22330911532241743}, "ground_truth": 0}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6826524205797336, "res": {"Yes": 0.6826524205797336, "yes": 0.308057877407934}, "ground_truth": 1}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7531270201122444, "res": {"Yes": 0.7531270201122444, "yes": 0.23964371007922242}, "ground_truth": 0}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8328622222125857, "res": {"Yes": 0.8328622222125857, "yes": 0.16090801703961613}, "ground_truth": 0}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8792136768472617, "res": {"Yes": 0.8792136768472617, "yes": 0.11344860091557761}, "ground_truth": 0}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.914626338808881, "res": {"Yes": 0.914626338808881, "yes": 0.07968240363969124}, "ground_truth": 0}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9366736404779943, "res": {"Yes": 0.9366736404779943, "yes": 0.05642349664804403}, "ground_truth": 1}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9403249969845239, "res": {"Yes": 0.9403249969845239, "yes": 0.053782185025831944}, "ground_truth": 0}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9455412160787834, "res": {"Yes": 0.9455412160787834, "yes": 0.049718175922987}, "ground_truth": 0}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6667648526350964, "res": {"Yes": 0.6667648526350964, "yes": 0.3228661299597564}, "ground_truth": 0}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7703340169088454, "res": {"Yes": 0.7703340169088454, "yes": 0.21420679473921755}, "ground_truth": 0}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7499356620985705, "res": {"Yes": 0.7499356620985705, "yes": 0.23681230823010074}, "ground_truth": 1}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8921782005084515, "res": {"Yes": 0.8921782005084515, "yes": 0.09577914603580875}, "ground_truth": 0}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8881827780792229, "res": {"Yes": 0.8881827780792229, "yes": 0.09317432295720655}, "ground_truth": 0}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9168170554089002, "res": {"Yes": 0.9168170554089002, "yes": 0.0784858441307255}, "ground_truth": 0}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9378789119938062, "res": {"Yes": 0.9378789119938062, "yes": 0.055403794211603234}, "ground_truth": 0}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.956696272989171, "res": {"Yes": 0.956696272989171, "yes": 0.04087721751928232}, "ground_truth": 1}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9727866997458063, "res": {"Yes": 0.9727866997458063, "yes": 0.024877885566298573}, "ground_truth": 0}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9333562296686608, "res": {"Yes": 0.9333562296686608, "yes": 0.06343276201395671}, "ground_truth": 0}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8932275779901444, "res": {"Yes": 0.8932275779901444, "yes": 0.09771934846312065}, "ground_truth": 0}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9351844560056453, "res": {"Yes": 0.9351844560056453, "yes": 0.05461828839641436}, "ground_truth": 0}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9061161085661967, "res": {"Yes": 0.9061161085661967, "yes": 0.08335414007942518}, "ground_truth": 1}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9312835342535611, "res": {"Yes": 0.9312835342535611, "yes": 0.05901866672182241}, "ground_truth": 0}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9236861957609559, "res": {"Yes": 0.9236861957609559, "yes": 0.06644872376178643}, "ground_truth": 0}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8542495369852946, "res": {"Yes": 0.8542495369852946, "yes": 0.1339827391019909}, "ground_truth": 0}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.751751862172379, "res": {"Yes": 0.751751862172379, "yes": 0.23457924884995218}, "ground_truth": 0}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.800953408718012, "res": {"Yes": 0.800953408718012, "yes": 0.18862695452240544}, "ground_truth": 1}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8553789601469096, "res": {"Yes": 0.8553789601469096, "yes": 0.13316890421091304}, "ground_truth": 0}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9580988170589092, "res": {"Yes": 0.9580988170589092, "yes": 0.03827592387160426}, "ground_truth": 0}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8933630453306027, "res": {"Yes": 0.8933630453306027, "yes": 0.10061122973116676}, "ground_truth": 0}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8934897859082835, "res": {"Yes": 0.8934897859082835, "yes": 0.10064230327641621}, "ground_truth": 0}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8164484695118754, "res": {"Yes": 0.8164484695118754, "yes": 0.17327662033295932}, "ground_truth": 1}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8777487482711059, "res": {"Yes": 0.8777487482711059, "yes": 0.11563251392080294}, "ground_truth": 0}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.862989728547712, "res": {"Yes": 0.862989728547712, "yes": 0.1242212908824361}, "ground_truth": 0}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8709447386580372, "res": {"Yes": 0.8709447386580372, "yes": 0.12578670800951058}, "ground_truth": 0}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9816709698753078, "res": {"Yes": 0.9816709698753078, "yes": 0.014983240537831077}, "ground_truth": 0}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7786818848447671, "res": {"Yes": 0.7786818848447671, "yes": 0.21655944423760745}, "ground_truth": 1}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7769456574438774, "res": {"Yes": 0.7769456574438774, "yes": 0.21826802475954485}, "ground_truth": 0}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7172249713672461, "res": {"Yes": 0.7172249713672461, "yes": 0.2750118751181704}, "ground_truth": 0}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9328990997661347, "res": {"Yes": 0.9328990997661347, "yes": 0.055403716646345635}, "ground_truth": 0}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9347571930456889, "res": {"Yes": 0.9347571930456889, "yes": 0.058003629082803466}, "ground_truth": 0}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9289235187347373, "res": {"Yes": 0.9289235187347373, "yes": 0.05617110501170009}, "ground_truth": 1}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9424525026463794, "res": {"Yes": 0.9424525026463794, "yes": 0.052051414942328095}, "ground_truth": 0}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.948728532186227, "res": {"Yes": 0.948728532186227, "yes": 0.04361554741952904}, "ground_truth": 0}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8629615955411318, "res": {"Yes": 0.8629615955411318, "yes": 0.12525549095725785}, "ground_truth": 0}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8869861510866727, "res": {"Yes": 0.8869861510866727, "yes": 0.10105696613524279}, "ground_truth": 0}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9371578338356018, "res": {"Yes": 0.9371578338356018, "yes": 0.051245780298110985}, "ground_truth": 1}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8884103951953374, "res": {"Yes": 0.8884103951953374, "yes": 0.10366783349039488}, "ground_truth": 0}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8848026439757566, "res": {"Yes": 0.8848026439757566, "yes": 0.10148021759883046}, "ground_truth": 0}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7315408616500374, "res": {"Yes": 0.7315408616500374, "yes": 0.25423663800172347}, "ground_truth": 0}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8664012158159496, "res": {"Yes": 0.8664012158159496, "yes": 0.12300589474524311}, "ground_truth": 0}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7704419251517712, "res": {"Yes": 0.7704419251517712, "yes": 0.19961445457827134}, "ground_truth": 1}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7454897231873442, "res": {"Yes": 0.7454897231873442, "yes": 0.2497106328759748}, "ground_truth": 0}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8450807111774604, "res": {"Yes": 0.8450807111774604, "yes": 0.1476555769627916}, "ground_truth": 0}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9642288174862672, "res": {"Yes": 0.9642288174862672, "yes": 0.02701699949969246}, "ground_truth": 0}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9538684882727673, "res": {"Yes": 0.9538684882727673, "yes": 0.03877981886554376}, "ground_truth": 0}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9587735791169614, "res": {"Yes": 0.9587735791169614, "yes": 0.03142534016193661}, "ground_truth": 1}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.635211566493366, "res": {"Yes": 0.635211566493366, "yes": 0.3374693458181751}, "ground_truth": 0}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.758188078081037, "res": {"Yes": 0.758188078081037, "yes": 0.22824716047514884}, "ground_truth": 0}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9112807877020975, "res": {"Yes": 0.9112807877020975, "yes": 0.08569549687284225}, "ground_truth": 0}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8887096942037239, "res": {"Yes": 0.8887096942037239, "yes": 0.1066234297633449}, "ground_truth": 0}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8148778616320659, "res": {"Yes": 0.8148778616320659, "yes": 0.17903871707888863}, "ground_truth": 1}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.957367738101101, "res": {"Yes": 0.957367738101101, "yes": 0.03460595506578821}, "ground_truth": 0}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8850677705618711, "res": {"Yes": 0.8850677705618711, "yes": 0.10724881180249456}, "ground_truth": 0}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9261046201446812, "res": {"Yes": 0.9261046201446812, "yes": 0.06857227366021412}, "ground_truth": 0}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9639332042204685, "res": {"Yes": 0.9639332042204685, "yes": 0.033695142212876526}, "ground_truth": 0}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.940280419113749, "res": {"Yes": 0.940280419113749, "yes": 0.05518198156002847}, "ground_truth": 1}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9457784130918528, "res": {"Yes": 0.9457784130918528, "yes": 0.04967964435988443}, "ground_truth": 0}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9354712555838205, "res": {"Yes": 0.9354712555838205, "yes": 0.05934080490030769}, "ground_truth": 0}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5868164087495861, "res": {"Yes": 0.5868164087495861, "yes": 0.39996875796980613}, "ground_truth": 0}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8028300510201634, "res": {"Yes": 0.8028300510201634, "yes": 0.17860832483640832}, "ground_truth": 0}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9571085371597772, "res": {"Yes": 0.9571085371597772, "yes": 0.02274018170832863}, "ground_truth": 1}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7841937100541924, "res": {"Yes": 0.7841937100541924, "yes": 0.19757927159615923}, "ground_truth": 0}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5110696994187803, "res": {"Yes": 0.5110696994187803, "yes": 0.46969382158296913}, "ground_truth": 0}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9253135462386906, "res": {"Yes": 0.9253135462386906, "yes": 0.0657138413922139}, "ground_truth": 0}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9057325909340039, "res": {"Yes": 0.9057325909340039, "yes": 0.08697781437772432}, "ground_truth": 0}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8306617555662893, "res": {"Yes": 0.8306617555662893, "yes": 0.16007157820570467}, "ground_truth": 1}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8842722178531423, "res": {"Yes": 0.8842722178531423, "yes": 0.10961555383327024}, "ground_truth": 0}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8350580463171164, "res": {"Yes": 0.8350580463171164, "yes": 0.1579716330076972}, "ground_truth": 0}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7967694056280955, "res": {"Yes": 0.7967694056280955, "yes": 0.185129647879567}, "ground_truth": 0}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9103222384189901, "res": {"Yes": 0.9103222384189901, "yes": 0.07868956692568117}, "ground_truth": 0}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8207656704014009, "res": {"Yes": 0.8207656704014009, "yes": 0.15827967749927507}, "ground_truth": 1}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.940076839415414, "res": {"Yes": 0.940076839415414, "yes": 0.051245093609255804}, "ground_truth": 0}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8971764544936959, "res": {"Yes": 0.8971764544936959, "yes": 0.09448994993803982}, "ground_truth": 0}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9344940492037996, "res": {"Yes": 0.9344940492037996, "yes": 0.05908059797055015}, "ground_truth": 0}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9592634323327762, "res": {"Yes": 0.9592634323327762, "yes": 0.036446549096593714}, "ground_truth": 0}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9701875671325871, "res": {"Yes": 0.9701875671325871, "yes": 0.026944130434124025}, "ground_truth": 1}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9591707537365365, "res": {"Yes": 0.9591707537365365, "yes": 0.03800024773252607}, "ground_truth": 0}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9720385624560203, "res": {"Yes": 0.9720385624560203, "yes": 0.025315687908024517}, "ground_truth": 0}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9092868300217459, "res": {"Yes": 0.9092868300217459, "yes": 0.08051821072139374}, "ground_truth": 0}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8504358297688046, "res": {"Yes": 0.8504358297688046, "yes": 0.14012913341073507}, "ground_truth": 0}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8263202820096439, "res": {"Yes": 0.8263202820096439, "yes": 0.16498643839581192}, "ground_truth": 1}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9202672058201925, "res": {"Yes": 0.9202672058201925, "yes": 0.0698681279574896}, "ground_truth": 0}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8218506511763419, "res": {"Yes": 0.8218506511763419, "yes": 0.16841575913342266}, "ground_truth": 0}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8694627298445864, "res": {"Yes": 0.8694627298445864, "yes": 0.11328274696418597}, "ground_truth": 0}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8390158174091256, "res": {"Yes": 0.8390158174091256, "yes": 0.155461417052351}, "ground_truth": 0}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8489134596184231, "res": {"Yes": 0.8489134596184231, "yes": 0.14650021250206882}, "ground_truth": 1}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9090665910564619, "res": {"Yes": 0.9090665910564619, "yes": 0.08804570714236717}, "ground_truth": 0}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9271251751574918, "res": {"Yes": 0.9271251751574918, "yes": 0.0689548525092629}, "ground_truth": 0}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7491556175170803, "res": {"Yes": 0.7491556175170803, "yes": 0.24813334397382467}, "ground_truth": 0}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7401676976589242, "res": {"Yes": 0.7401676976589242, "yes": 0.25647661195258487}, "ground_truth": 0}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.33833227644064223, "res": {"yes": 0.655374684167943, "Yes": 0.33833227644064223}, "ground_truth": 1}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5835054424789813, "res": {"Yes": 0.5835054424789813, "yes": 0.4109719972577074}, "ground_truth": 0}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.500099600198855, "res": {"Yes": 0.500099600198855, "yes": 0.49778471232316646}, "ground_truth": 0}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8454928053696918, "res": {"Yes": 0.8454928053696918, "yes": 0.14508222910856813}, "ground_truth": 0}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8662249817428728, "res": {"Yes": 0.8662249817428728, "yes": 0.12527386728565204}, "ground_truth": 0}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8619258611189493, "res": {"Yes": 0.8619258611189493, "yes": 0.12535080905887255}, "ground_truth": 1}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8941644787735158, "res": {"Yes": 0.8941644787735158, "yes": 0.09822437269173526}, "ground_truth": 0}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8980835813600105, "res": {"Yes": 0.8980835813600105, "yes": 0.09399899389632482}, "ground_truth": 0}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9134635969867332, "res": {"Yes": 0.9134635969867332, "yes": 0.08356500057168896}, "ground_truth": 0}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8602168933175578, "res": {"Yes": 0.8602168933175578, "yes": 0.13443525114215074}, "ground_truth": 0}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7597571015571717, "res": {"Yes": 0.7597571015571717, "yes": 0.23250884771281793}, "ground_truth": 1}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9634279272378209, "res": {"Yes": 0.9634279272378209, "yes": 0.025093874661921745}, "ground_truth": 0}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8652932086066314, "res": {"Yes": 0.8652932086066314, "yes": 0.13028127656448854}, "ground_truth": 0}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9515724283159921, "res": {"Yes": 0.9515724283159921, "yes": 0.04273906097367079}, "ground_truth": 0}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8726105046773761, "res": {"Yes": 0.8726105046773761, "yes": 0.1099318555671531}, "ground_truth": 0}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8265799526343702, "res": {"Yes": 0.8265799526343702, "yes": 0.16102447611393433}, "ground_truth": 1}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8326629171477354, "res": {"Yes": 0.8326629171477354, "yes": 0.15573625120161683}, "ground_truth": 0}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8974409900761968, "res": {"Yes": 0.8974409900761968, "yes": 0.09532644639993418}, "ground_truth": 0}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8066584052319133, "res": {"Yes": 0.8066584052319133, "yes": 0.1892125486074588}, "ground_truth": 0}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.804518360950754, "res": {"Yes": 0.804518360950754, "yes": 0.19276499008318995}, "ground_truth": 0}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8243184543571482, "res": {"Yes": 0.8243184543571482, "yes": 0.17344543929044923}, "ground_truth": 1}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.67681366519703, "res": {"Yes": 0.67681366519703, "yes": 0.3141439370053465}, "ground_truth": 0}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6516922875957017, "res": {"Yes": 0.6516922875957017, "yes": 0.3412319935598945}, "ground_truth": 0}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7865604892754912, "res": {"Yes": 0.7865604892754912, "yes": 0.18811872490914447}, "ground_truth": 0}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7970915408596039, "res": {"Yes": 0.7970915408596039, "yes": 0.1917306224815672}, "ground_truth": 0}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6856369765696332, "res": {"Yes": 0.6856369765696332, "yes": 0.30244437582190825}, "ground_truth": 1}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7063735738530103, "res": {"Yes": 0.7063735738530103, "yes": 0.28592758486370934}, "ground_truth": 0}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7092453227196079, "res": {"Yes": 0.7092453227196079, "yes": 0.2741640854025903}, "ground_truth": 0}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.27117982968490506, "res": {"yes": 0.5189848951484411, "Yes": 0.27117982968490506}, "ground_truth": 0}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8360700981089344, "res": {"Yes": 0.8360700981089344, "yes": 0.1563356313257762}, "ground_truth": 0}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7895556185126527, "res": {"Yes": 0.7895556185126527, "yes": 0.20476946369203233}, "ground_truth": 1}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.967231315233338, "res": {"Yes": 0.967231315233338, "yes": 0.02357394033847199}, "ground_truth": 0}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.3307058729515934, "res": {"yes": 0.4618017188513113, "Yes": 0.3307058729515934}, "ground_truth": 0}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7906897129850033, "res": {"Yes": 0.7906897129850033, "yes": 0.1950767905976072}, "ground_truth": 0}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8584252353271206, "res": {"Yes": 0.8584252353271206, "yes": 0.12834493947296588}, "ground_truth": 0}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8338561112377909, "res": {"Yes": 0.8338561112377909, "yes": 0.15714170517223486}, "ground_truth": 1}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9227187258307045, "res": {"Yes": 0.9227187258307045, "yes": 0.06461817894267351}, "ground_truth": 0}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8127055354997659, "res": {"Yes": 0.8127055354997659, "yes": 0.1769239676492264}, "ground_truth": 0}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6603690589614897, "res": {"Yes": 0.6603690589614897, "yes": 0.3198520288424886}, "ground_truth": 0}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.3072101923611667, "res": {"yes": 0.6757422652919023, "Yes": 0.3072101923611667}, "ground_truth": 0}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3254357257387937, "res": {"yes": 0.6593691990537547, "Yes": 0.3254357257387937}, "ground_truth": 1}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9573395574391272, "res": {"Yes": 0.9573395574391272, "yes": 0.027240337715463935}, "ground_truth": 0}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7268144797882519, "res": {"Yes": 0.7268144797882519, "yes": 0.26412645349600194}, "ground_truth": 0}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8260305257749609, "res": {"Yes": 0.8260305257749609, "yes": 0.16134249102385362}, "ground_truth": 0}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8161960136974489, "res": {"Yes": 0.8161960136974489, "yes": 0.16865548416123574}, "ground_truth": 0}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.751858302735775, "res": {"Yes": 0.751858302735775, "yes": 0.2368310408245461}, "ground_truth": 1}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7999692256432961, "res": {"Yes": 0.7999692256432961, "yes": 0.19148773871676203}, "ground_truth": 0}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8910081629439854, "res": {"Yes": 0.8910081629439854, "yes": 0.1049326041653612}, "ground_truth": 0}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8708649898996709, "res": {"Yes": 0.8708649898996709, "yes": 0.1257512034335462}, "ground_truth": 0}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9301195060140658, "res": {"Yes": 0.9301195060140658, "yes": 0.06394906320058003}, "ground_truth": 0}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7576922715333672, "res": {"Yes": 0.7576922715333672, "yes": 0.23698552871156892}, "ground_truth": 1}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9564701510676487, "res": {"Yes": 0.9564701510676487, "yes": 0.04016496021081795}, "ground_truth": 0}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5446110003462497, "res": {"Yes": 0.5446110003462497, "yes": 0.44446591883714487}, "ground_truth": 0}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7632608514826167, "res": {"Yes": 0.7632608514826167, "yes": 0.22382526466469513}, "ground_truth": 0}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9811491237420036, "res": {"Yes": 0.9811491237420036, "yes": 0.01058521566517136}, "ground_truth": 0}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4264666967898036, "res": {"yes": 0.5645887663574449, "Yes": 0.4264666967898036}, "ground_truth": 1}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.43747319221456094, "res": {"yes": 0.5539078454636326, "Yes": 0.43747319221456094}, "ground_truth": 0}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5288508150685186, "res": {"Yes": 0.5288508150685186, "yes": 0.4609337295530771}, "ground_truth": 0}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8706865592363285, "res": {"Yes": 0.8706865592363285, "yes": 0.11884150076707395}, "ground_truth": 0}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8385253547106079, "res": {"Yes": 0.8385253547106079, "yes": 0.14496620973921673}, "ground_truth": 0}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8452834703600633, "res": {"Yes": 0.8452834703600633, "yes": 0.14125053801600126}, "ground_truth": 1}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8436999559677673, "res": {"Yes": 0.8436999559677673, "yes": 0.13881946541502382}, "ground_truth": 0}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7208608688594457, "res": {"Yes": 0.7208608688594457, "yes": 0.24969654959343288}, "ground_truth": 0}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9698375051322968, "res": {"Yes": 0.9698375051322968, "yes": 0.021229278261626263}, "ground_truth": 0}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8272996217469442, "res": {"Yes": 0.8272996217469442, "yes": 0.15649274936676774}, "ground_truth": 0}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7818711279776893, "res": {"Yes": 0.7818711279776893, "yes": 0.212054470710327}, "ground_truth": 1}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.82406085388285, "res": {"Yes": 0.82406085388285, "yes": 0.16919090266683015}, "ground_truth": 0}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7996038271793304, "res": {"Yes": 0.7996038271793304, "yes": 0.19550932548574712}, "ground_truth": 0}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8234120379047851, "res": {"Yes": 0.8234120379047851, "yes": 0.17028860583187674}, "ground_truth": 0}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8393176992555124, "res": {"Yes": 0.8393176992555124, "yes": 0.14716323001284434}, "ground_truth": 0}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9765187525593249, "res": {"Yes": 0.9765187525593249, "yes": 0.02077134679318809}, "ground_truth": 1}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9782498521782338, "res": {"Yes": 0.9782498521782338, "yes": 0.020142684673589623}, "ground_truth": 0}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9632816736023664, "res": {"Yes": 0.9632816736023664, "yes": 0.03350123416779572}, "ground_truth": 0}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6126492455750949, "res": {"Yes": 0.6126492455750949, "yes": 0.3693266338141182}, "ground_truth": 0}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8703202601119766, "res": {"Yes": 0.8703202601119766, "yes": 0.11790090446610521}, "ground_truth": 0}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8679333091934605, "res": {"Yes": 0.8679333091934605, "yes": 0.12424231090309698}, "ground_truth": 1}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8317828248991915, "res": {"Yes": 0.8317828248991915, "yes": 0.15431240924525594}, "ground_truth": 0}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8384047582817399, "res": {"Yes": 0.8384047582817399, "yes": 0.15089321164583186}, "ground_truth": 0}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9125102871499962, "res": {"Yes": 0.9125102871499962, "yes": 0.07838940508341977}, "ground_truth": 0}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9021428600774005, "res": {"Yes": 0.9021428600774005, "yes": 0.07966969530982695}, "ground_truth": 0}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9085001872835796, "res": {"Yes": 0.9085001872835796, "yes": 0.07865942672631575}, "ground_truth": 1}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8895504644353794, "res": {"Yes": 0.8895504644353794, "yes": 0.0984905179406257}, "ground_truth": 0}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8903427660556688, "res": {"Yes": 0.8903427660556688, "yes": 0.09183272163879722}, "ground_truth": 0}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9515240753915846, "res": {"Yes": 0.9515240753915846, "yes": 0.043575770742942756}, "ground_truth": 0}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9215882303588893, "res": {"Yes": 0.9215882303588893, "yes": 0.07124855077786689}, "ground_truth": 0}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9209545338636894, "res": {"Yes": 0.9209545338636894, "yes": 0.07289169278698666}, "ground_truth": 1}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9638304226111558, "res": {"Yes": 0.9638304226111558, "yes": 0.03367616371649824}, "ground_truth": 0}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.871694570166759, "res": {"Yes": 0.871694570166759, "yes": 0.12066880922184141}, "ground_truth": 0}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6766244883803457, "res": {"Yes": 0.6766244883803457, "yes": 0.31757575204155036}, "ground_truth": 0}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5404930286234623, "res": {"Yes": 0.5404930286234623, "yes": 0.44733085069328504}, "ground_truth": 0}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.734729981041388, "res": {"Yes": 0.734729981041388, "yes": 0.25404280564528486}, "ground_truth": 1}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7090170888429974, "res": {"Yes": 0.7090170888429974, "yes": 0.27680189753655476}, "ground_truth": 0}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6614146491367688, "res": {"Yes": 0.6614146491367688, "yes": 0.3233854252852051}, "ground_truth": 0}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8057519404792564, "res": {"Yes": 0.8057519404792564, "yes": 0.17298807802660313}, "ground_truth": 0}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8590216788547338, "res": {"Yes": 0.8590216788547338, "yes": 0.12731108492880452}, "ground_truth": 0}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.800888101639671, "res": {"Yes": 0.800888101639671, "yes": 0.18299070861963265}, "ground_truth": 1}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8848428733272945, "res": {"Yes": 0.8848428733272945, "yes": 0.09676060893584854}, "ground_truth": 0}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8106457554364109, "res": {"Yes": 0.8106457554364109, "yes": 0.18156201558025928}, "ground_truth": 0}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9605484812829778, "res": {"Yes": 0.9605484812829778, "yes": 0.03287411858323447}, "ground_truth": 0}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8692851341383767, "res": {"Yes": 0.8692851341383767, "yes": 0.12148872729145681}, "ground_truth": 0}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8761709225608598, "res": {"Yes": 0.8761709225608598, "yes": 0.1142551127113878}, "ground_truth": 1}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.889936719917549, "res": {"Yes": 0.889936719917549, "yes": 0.10238055264388897}, "ground_truth": 0}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8224790235092866, "res": {"Yes": 0.8224790235092866, "yes": 0.16868184707361808}, "ground_truth": 0}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8248742735152149, "res": {"Yes": 0.8248742735152149, "yes": 0.1646527941605137}, "ground_truth": 0}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7711425521246865, "res": {"Yes": 0.7711425521246865, "yes": 0.21328764037068274}, "ground_truth": 0}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5807973373119802, "res": {"Yes": 0.5807973373119802, "yes": 0.4108834834227803}, "ground_truth": 1}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9237286124049439, "res": {"Yes": 0.9237286124049439, "yes": 0.07112163490945521}, "ground_truth": 0}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.890883243447145, "res": {"Yes": 0.890883243447145, "yes": 0.10197504015557199}, "ground_truth": 0}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8700395139774546, "res": {"Yes": 0.8700395139774546, "yes": 0.12002508696596158}, "ground_truth": 0}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8165309594676602, "res": {"Yes": 0.8165309594676602, "yes": 0.17108809152660165}, "ground_truth": 0}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9031346930048512, "res": {"Yes": 0.9031346930048512, "yes": 0.09225520268337745}, "ground_truth": 1}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7402036336729851, "res": {"Yes": 0.7402036336729851, "yes": 0.24599809179764537}, "ground_truth": 0}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.987285594770892, "res": {"Yes": 0.987285594770892, "yes": 0.008269981557478424}, "ground_truth": 0}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7808443068811053, "res": {"Yes": 0.7808443068811053, "yes": 0.20803130620613206}, "ground_truth": 0}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.569964751237454, "res": {"Yes": 0.569964751237454, "yes": 0.42276803540846075}, "ground_truth": 0}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6865402395352114, "res": {"Yes": 0.6865402395352114, "yes": 0.29460924320633447}, "ground_truth": 1}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6456245087494406, "res": {"Yes": 0.6456245087494406, "yes": 0.34582880832238555}, "ground_truth": 0}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5573345115403663, "res": {"Yes": 0.5573345115403663, "yes": 0.4337744848900649}, "ground_truth": 0}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7565977050716558, "res": {"Yes": 0.7565977050716558, "yes": 0.23348685475741845}, "ground_truth": 0}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8637791640611908, "res": {"Yes": 0.8637791640611908, "yes": 0.1273050505263954}, "ground_truth": 0}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8530006836716346, "res": {"Yes": 0.8530006836716346, "yes": 0.12617375677303935}, "ground_truth": 1}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8926154749501898, "res": {"Yes": 0.8926154749501898, "yes": 0.092971433470332}, "ground_truth": 0}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8867968307952778, "res": {"Yes": 0.8867968307952778, "yes": 0.10016333254193496}, "ground_truth": 0}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7230376163718198, "res": {"Yes": 0.7230376163718198, "yes": 0.26323421283598863}, "ground_truth": 0}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5924879522114045, "res": {"Yes": 0.5924879522114045, "yes": 0.40163022177990376}, "ground_truth": 0}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6097560011195851, "res": {"Yes": 0.6097560011195851, "yes": 0.38264619101609093}, "ground_truth": 1}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5251153637782119, "res": {"Yes": 0.5251153637782119, "yes": 0.46623555021515695}, "ground_truth": 0}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5545284866120097, "res": {"Yes": 0.5545284866120097, "yes": 0.4410625838993966}, "ground_truth": 0}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9622417851319574, "res": {"Yes": 0.9622417851319574, "yes": 0.031197391295067083}, "ground_truth": 0}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5176096977385701, "res": {"Yes": 0.5176096977385701, "yes": 0.4748064650379118}, "ground_truth": 0}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7090245264712697, "res": {"Yes": 0.7090245264712697, "yes": 0.28590982931200487}, "ground_truth": 1}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4203825537533272, "res": {"yes": 0.571546862490802, "Yes": 0.4203825537533272}, "ground_truth": 0}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5083999439066236, "res": {"Yes": 0.5083999439066236, "yes": 0.48685849636484363}, "ground_truth": 0}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9829437482893262, "res": {"Yes": 0.9829437482893262, "yes": 0.013717690524107814}, "ground_truth": 0}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7669003713417539, "res": {"Yes": 0.7669003713417539, "yes": 0.20660399202504476}, "ground_truth": 0}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7831385977772702, "res": {"Yes": 0.7831385977772702, "yes": 0.17128262654767618}, "ground_truth": 1}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9884196939354158, "res": {"Yes": 0.9884196939354158, "yes": 0.010431693030375562}, "ground_truth": 0}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9822306376305101, "res": {"Yes": 0.9822306376305101, "yes": 0.01612095984406211}, "ground_truth": 0}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8479460970585556, "res": {"Yes": 0.8479460970585556, "yes": 0.1473294416590872}, "ground_truth": 0}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.844971998537058, "res": {"Yes": 0.844971998537058, "yes": 0.15205166721591537}, "ground_truth": 0}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7637258723701896, "res": {"Yes": 0.7637258723701896, "yes": 0.2312678075344499}, "ground_truth": 1}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7865356422220963, "res": {"Yes": 0.7865356422220963, "yes": 0.2074031697571456}, "ground_truth": 0}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8830625436728377, "res": {"Yes": 0.8830625436728377, "yes": 0.1142932002873965}, "ground_truth": 0}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8910705178770019, "res": {"Yes": 0.8910705178770019, "yes": 0.09591465436073754}, "ground_truth": 0}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8810844329591578, "res": {"Yes": 0.8810844329591578, "yes": 0.10985501807925667}, "ground_truth": 0}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8875353122263928, "res": {"Yes": 0.8875353122263928, "yes": 0.1067213550126801}, "ground_truth": 1}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9117660317086542, "res": {"Yes": 0.9117660317086542, "yes": 0.08338902274857993}, "ground_truth": 0}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9342841200090442, "res": {"Yes": 0.9342841200090442, "yes": 0.06012088461804164}, "ground_truth": 0}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8214446340020184, "res": {"Yes": 0.8214446340020184, "yes": 0.16873387657779274}, "ground_truth": 0}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7839438822157936, "res": {"Yes": 0.7839438822157936, "yes": 0.21046426557694337}, "ground_truth": 0}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7623560615941423, "res": {"Yes": 0.7623560615941423, "yes": 0.22067677601628283}, "ground_truth": 1}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9702639966685168, "res": {"Yes": 0.9702639966685168, " Yes": 0.014377166365017305}, "ground_truth": 0}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8004938334083189, "res": {"Yes": 0.8004938334083189, "yes": 0.190022460623587}, "ground_truth": 0}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7252609411899764, "res": {"Yes": 0.7252609411899764, "yes": 0.2643263671885303}, "ground_truth": 0}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7522059220532187, "res": {"Yes": 0.7522059220532187, "yes": 0.22646936433640208}, "ground_truth": 0}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7319428074296435, "res": {"Yes": 0.7319428074296435, "yes": 0.25637898988995317}, "ground_truth": 1}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.794141263366435, "res": {"Yes": 0.794141263366435, "yes": 0.1819100217760858}, "ground_truth": 0}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7132334662638783, "res": {"Yes": 0.7132334662638783, "yes": 0.2652561542660976}, "ground_truth": 0}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8823433438351419, "res": {"Yes": 0.8823433438351419, "yes": 0.10076297523459644}, "ground_truth": 0}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8167405163632168, "res": {"Yes": 0.8167405163632168, "yes": 0.17015428409662542}, "ground_truth": 0}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8000036730597999, "res": {"Yes": 0.8000036730597999, "yes": 0.18497007939204127}, "ground_truth": 1}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7768135646039332, "res": {"Yes": 0.7768135646039332, "yes": 0.2043166668933036}, "ground_truth": 0}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7981464795849305, "res": {"Yes": 0.7981464795849305, "yes": 0.190687922660313}, "ground_truth": 0}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9012351658922413, "res": {"Yes": 0.9012351658922413, "yes": 0.09093501031231657}, "ground_truth": 0}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9070019671645027, "res": {"Yes": 0.9070019671645027, "yes": 0.08853455067928268}, "ground_truth": 0}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8742993816911422, "res": {"Yes": 0.8742993816911422, "yes": 0.10946782593268371}, "ground_truth": 1}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9655790092417339, "res": {"Yes": 0.9655790092417339, "yes": 0.025741113969300736}, "ground_truth": 0}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9051656485134574, "res": {"Yes": 0.9051656485134574, "yes": 0.08698739986101918}, "ground_truth": 0}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.860602718276208, "res": {"Yes": 0.860602718276208, "yes": 0.13450886114384533}, "ground_truth": 0}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9168460135329141, "res": {"Yes": 0.9168460135329141, "yes": 0.0790780373125087}, "ground_truth": 0}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9146769098971785, "res": {"Yes": 0.9146769098971785, "yes": 0.07905341624538872}, "ground_truth": 1}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8462108743289702, "res": {"Yes": 0.8462108743289702, "yes": 0.14912013839011315}, "ground_truth": 0}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8551904297423663, "res": {"Yes": 0.8551904297423663, "yes": 0.13774919154061277}, "ground_truth": 0}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9423468464493777, "res": {"Yes": 0.9423468464493777, "yes": 0.0552247696943349}, "ground_truth": 0}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9078544959253313, "res": {"Yes": 0.9078544959253313, "yes": 0.0886894798873371}, "ground_truth": 0}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9470501248963812, "res": {"Yes": 0.9470501248963812, "yes": 0.04789103274115198}, "ground_truth": 1}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9479976401722424, "res": {"Yes": 0.9479976401722424, "yes": 0.0485599481491599}, "ground_truth": 0}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9646637152337877, "res": {"Yes": 0.9646637152337877, "yes": 0.03316902899640138}, "ground_truth": 0}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9185489925598888, "res": {"Yes": 0.9185489925598888, "yes": 0.06328783173407906}, "ground_truth": 0}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9579507801932456, "res": {"Yes": 0.9579507801932456, "yes": 0.03938305623385735}, "ground_truth": 0}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9052618682098118, "res": {"Yes": 0.9052618682098118, "yes": 0.08279709522748194}, "ground_truth": 1}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8997901819534081, "res": {"Yes": 0.8997901819534081, "yes": 0.08407399290152104}, "ground_truth": 0}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9543572545250487, "res": {"Yes": 0.9543572545250487, "yes": 0.02566929110961196}, "ground_truth": 0}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8854492107344202, "res": {"Yes": 0.8854492107344202, "yes": 0.10825356279340417}, "ground_truth": 0}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6709017473031548, "res": {"Yes": 0.6709017473031548, "yes": 0.3245326926976784}, "ground_truth": 0}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7450744892725993, "res": {"Yes": 0.7450744892725993, "yes": 0.25060186359888376}, "ground_truth": 1}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5422230667183552, "res": {"Yes": 0.5422230667183552, "yes": 0.45389066769393827}, "ground_truth": 0}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6006703484534498, "res": {"Yes": 0.6006703484534498, "yes": 0.3913548835819607}, "ground_truth": 0}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8169423006953396, "res": {"Yes": 0.8169423006953396, "yes": 0.1761989719605853}, "ground_truth": 0}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8837134592583873, "res": {"Yes": 0.8837134592583873, "yes": 0.11179342839474235}, "ground_truth": 0}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6863667795988019, "res": {"Yes": 0.6863667795988019, "yes": 0.3080478656896095}, "ground_truth": 1}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8718036434148584, "res": {"Yes": 0.8718036434148584, "yes": 0.12339348064305729}, "ground_truth": 0}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7427034270536312, "res": {"Yes": 0.7427034270536312, "yes": 0.2540324155070138}, "ground_truth": 0}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7209235576462707, "res": {"Yes": 0.7209235576462707, "yes": 0.2670746958418591}, "ground_truth": 0}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8771622745782129, "res": {"Yes": 0.8771622745782129, "yes": 0.10716252196587817}, "ground_truth": 0}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7713087975305086, "res": {"Yes": 0.7713087975305086, "yes": 0.2174804755649966}, "ground_truth": 1}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8556304186384524, "res": {"Yes": 0.8556304186384524, "yes": 0.12159145296951981}, "ground_truth": 0}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8948349241583716, "res": {"Yes": 0.8948349241583716, "yes": 0.09040623886246857}, "ground_truth": 0}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8257548097693125, "res": {"Yes": 0.8257548097693125, "yes": 0.17043274799201288}, "ground_truth": 0}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7748173614862273, "res": {"Yes": 0.7748173614862273, "yes": 0.21531728381764137}, "ground_truth": 0}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8171036055364493, "res": {"Yes": 0.8171036055364493, "yes": 0.17846799385038906}, "ground_truth": 1}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9081129716530836, "res": {"Yes": 0.9081129716530836, "yes": 0.08916179841674943}, "ground_truth": 0}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9671645137750036, "res": {"Yes": 0.9671645137750036, "yes": 0.026823693398555917}, "ground_truth": 0}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6913013107274161, "res": {"Yes": 0.6913013107274161, "yes": 0.28091405795628904}, "ground_truth": 0}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8498706224203186, "res": {"Yes": 0.8498706224203186, "yes": 0.14163741533981467}, "ground_truth": 0}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8264372724529412, "res": {"Yes": 0.8264372724529412, "yes": 0.16350845131184608}, "ground_truth": 1}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8027307149756979, "res": {"Yes": 0.8027307149756979, "yes": 0.18596328820349883}, "ground_truth": 0}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8315098369935096, "res": {"Yes": 0.8315098369935096, "yes": 0.15934170415517995}, "ground_truth": 0}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8670126602012163, "res": {"Yes": 0.8670126602012163, "yes": 0.11650025699653194}, "ground_truth": 0}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7338649640328241, "res": {"Yes": 0.7338649640328241, "yes": 0.26001834311834565}, "ground_truth": 0}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6478184316264608, "res": {"Yes": 0.6478184316264608, "yes": 0.34036435595541936}, "ground_truth": 1}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9867169113136552, "res": {"Yes": 0.9867169113136552, "yes": 0.008411693986610088}, "ground_truth": 0}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5419445657430544, "res": {"Yes": 0.5419445657430544, "yes": 0.4468214521814471}, "ground_truth": 0}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8426671742753145, "res": {"Yes": 0.8426671742753145, "yes": 0.1448868189966313}, "ground_truth": 0}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8988327157208622, "res": {"Yes": 0.8988327157208622, "yes": 0.09496555804617256}, "ground_truth": 0}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8118460016072029, "res": {"Yes": 0.8118460016072029, "yes": 0.17848794768750026}, "ground_truth": 1}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8632627181073503, "res": {"Yes": 0.8632627181073503, "yes": 0.12477177630813652}, "ground_truth": 0}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8852821333789422, "res": {"Yes": 0.8852821333789422, "yes": 0.10967860090101926}, "ground_truth": 0}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.698280552008405, "res": {"Yes": 0.698280552008405, "yes": 0.26713572938267544}, "ground_truth": 0}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5235384884393463, "res": {"Yes": 0.5235384884393463, "yes": 0.4532758220921347}, "ground_truth": 0}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8054117835430762, "res": {"Yes": 0.8054117835430762, "yes": 0.14300152277538664}, "ground_truth": 1}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3239628274060709, "res": {"yes": 0.6137868874057565, "Yes": 0.3239628274060709}, "ground_truth": 0}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.4761503087081824, "res": {"yes": 0.5158143319780075, "Yes": 0.4761503087081824}, "ground_truth": 0}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8210626525143525, "res": {"Yes": 0.8210626525143525, "yes": 0.1718179144223616}, "ground_truth": 0}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8185071391280548, "res": {"Yes": 0.8185071391280548, "yes": 0.1752938448087965}, "ground_truth": 0}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8619455132526163, "res": {"Yes": 0.8619455132526163, "yes": 0.13249258465365868}, "ground_truth": 1}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7915762644307041, "res": {"Yes": 0.7915762644307041, "yes": 0.1686454831875568}, "ground_truth": 0}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8611855694161528, "res": {"Yes": 0.8611855694161528, "yes": 0.13504819017243835}, "ground_truth": 0}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.37135199224864196, "res": {"yes": 0.6055281963628242, "Yes": 0.37135199224864196}, "ground_truth": 0}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.44038589603428985, "res": {"yes": 0.5370217191213535, "Yes": 0.44038589603428985}, "ground_truth": 0}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2665491832156494, "res": {"yes": 0.720681461709621, "Yes": 0.2665491832156494}, "ground_truth": 1}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5336896439947385, "res": {"Yes": 0.5336896439947385, "yes": 0.45336306795225434}, "ground_truth": 0}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.786595830232203, "res": {"Yes": 0.786595830232203, "yes": 0.2052004649191959}, "ground_truth": 0}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7844677552093859, "res": {"Yes": 0.7844677552093859, "yes": 0.2046947160068586}, "ground_truth": 0}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5841237838305546, "res": {"Yes": 0.5841237838305546, "yes": 0.40306173507336507}, "ground_truth": 0}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5315939462497133, "res": {"Yes": 0.5315939462497133, "yes": 0.46115789002137986}, "ground_truth": 1}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5434041418050904, "res": {"Yes": 0.5434041418050904, "yes": 0.448824130279088}, "ground_truth": 0}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7430877343579158, "res": {"Yes": 0.7430877343579158, "yes": 0.25070565932002087}, "ground_truth": 0}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8288293959661673, "res": {"Yes": 0.8288293959661673, "yes": 0.15900401905593714}, "ground_truth": 0}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7920803343908033, "res": {"Yes": 0.7920803343908033, "yes": 0.2014290839975069}, "ground_truth": 0}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7869086271791714, "res": {"Yes": 0.7869086271791714, "yes": 0.20275952663259353}, "ground_truth": 1}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8954203555165103, "res": {"Yes": 0.8954203555165103, "yes": 0.10025703907985854}, "ground_truth": 0}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8850456441441885, "res": {"Yes": 0.8850456441441885, "yes": 0.11025946698466765}, "ground_truth": 0}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8688476548152991, "res": {"Yes": 0.8688476548152991, "yes": 0.1273540851423624}, "ground_truth": 0}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8630891765837211, "res": {"Yes": 0.8630891765837211, "yes": 0.1325878015242787}, "ground_truth": 0}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9206377753856415, "res": {"Yes": 0.9206377753856415, "yes": 0.07680762960098113}, "ground_truth": 1}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9607166736373728, "res": {"Yes": 0.9607166736373728, "yes": 0.036154928679098385}, "ground_truth": 0}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9629437404716391, "res": {"Yes": 0.9629437404716391, "yes": 0.035089676801078724}, "ground_truth": 0}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8587823887496643, "res": {"Yes": 0.8587823887496643, "yes": 0.13827685609180268}, "ground_truth": 0}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8972410391680999, "res": {"Yes": 0.8972410391680999, "yes": 0.09842971881945793}, "ground_truth": 0}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.833735269228617, "res": {"Yes": 0.833735269228617, "yes": 0.16200766986901874}, "ground_truth": 1}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8834200311628333, "res": {"Yes": 0.8834200311628333, "yes": 0.11345624760897002}, "ground_truth": 0}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8909006559303011, "res": {"Yes": 0.8909006559303011, "yes": 0.10316739662763975}, "ground_truth": 0}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8952131881695979, "res": {"Yes": 0.8952131881695979, "yes": 0.10069783288456141}, "ground_truth": 0}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8686075211369598, "res": {"Yes": 0.8686075211369598, "yes": 0.12287372216990124}, "ground_truth": 0}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6313350972206636, "res": {"Yes": 0.6313350972206636, "yes": 0.3593261525528986}, "ground_truth": 1}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6804362788476344, "res": {"Yes": 0.6804362788476344, "yes": 0.2918576487101683}, "ground_truth": 0}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9224534952419575, "res": {"Yes": 0.9224534952419575, "yes": 0.07207031783270071}, "ground_truth": 0}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8970788739177564, "res": {"Yes": 0.8970788739177564, "yes": 0.0940825208915509}, "ground_truth": 0}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7475873682091027, "res": {"Yes": 0.7475873682091027, "yes": 0.23653052727341156}, "ground_truth": 0}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7977904739346139, "res": {"Yes": 0.7977904739346139, "yes": 0.19280163819109705}, "ground_truth": 1}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8828695979278142, "res": {"Yes": 0.8828695979278142, "yes": 0.10780744066745687}, "ground_truth": 0}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8587565312012264, "res": {"Yes": 0.8587565312012264, "yes": 0.13368493217381242}, "ground_truth": 0}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8897313284290012, "res": {"Yes": 0.8897313284290012, "yes": 0.09322547105940099}, "ground_truth": 0}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8996799770026009, "res": {"Yes": 0.8996799770026009, "yes": 0.08233101412016526}, "ground_truth": 0}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8208625264649037, "res": {"Yes": 0.8208625264649037, "yes": 0.1660812587258561}, "ground_truth": 1}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9451311727295447, "res": {"Yes": 0.9451311727295447, "yes": 0.04502393687865459}, "ground_truth": 0}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6965847801878636, "res": {"Yes": 0.6965847801878636, "yes": 0.26029941874596396}, "ground_truth": 0}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6605220709892987, "res": {"Yes": 0.6605220709892987, "yes": 0.32869796151146335}, "ground_truth": 0}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.48082701968114444, "res": {"yes": 0.5016229686210333, "Yes": 0.48082701968114444}, "ground_truth": 0}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6419858474535719, "res": {"Yes": 0.6419858474535719, "yes": 0.3415366790598261}, "ground_truth": 1}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.58491233364328, "res": {"Yes": 0.58491233364328, "yes": 0.39794427343336797}, "ground_truth": 0}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.4445798945097915, "res": {"yes": 0.5412288914805403, "Yes": 0.4445798945097915}, "ground_truth": 0}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7140254735771857, "res": {"Yes": 0.7140254735771857, "yes": 0.27918741062366403}, "ground_truth": 0}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7447590468227157, "res": {"Yes": 0.7447590468227157, "yes": 0.24855754272247055}, "ground_truth": 0}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8368574781438332, "res": {"Yes": 0.8368574781438332, "yes": 0.15923521982230607}, "ground_truth": 1}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7683852838115379, "res": {"Yes": 0.7683852838115379, "yes": 0.21807322022366477}, "ground_truth": 0}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9006086783266806, "res": {"Yes": 0.9006086783266806, "yes": 0.09303315909210802}, "ground_truth": 0}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8311702345936104, "res": {"Yes": 0.8311702345936104, "yes": 0.15824661632800502}, "ground_truth": 0}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.730332857196163, "res": {"Yes": 0.730332857196163, "yes": 0.24411930946015903}, "ground_truth": 0}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7547861763327348, "res": {"Yes": 0.7547861763327348, "yes": 0.23443073684294619}, "ground_truth": 1}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6661795168117656, "res": {"Yes": 0.6661795168117656, "yes": 0.31646586332750554}, "ground_truth": 0}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9118010168424764, "res": {"Yes": 0.9118010168424764, "yes": 0.06616500375533522}, "ground_truth": 0}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.661806726234924, "res": {"Yes": 0.661806726234924, "yes": 0.3311020642258555}, "ground_truth": 0}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8426897749120017, "res": {"Yes": 0.8426897749120017, "yes": 0.15280394263462094}, "ground_truth": 0}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5234932723727836, "res": {"Yes": 0.5234932723727836, "yes": 0.4638645441904275}, "ground_truth": 1}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7310023678826891, "res": {"Yes": 0.7310023678826891, "yes": 0.26355668784556074}, "ground_truth": 0}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8751552235630318, "res": {"Yes": 0.8751552235630318, "yes": 0.11898120970226811}, "ground_truth": 0}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8107681884030776, "res": {"Yes": 0.8107681884030776, "yes": 0.1851056381213035}, "ground_truth": 0}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6759392323183399, "res": {"Yes": 0.6759392323183399, "yes": 0.3146300420717253}, "ground_truth": 0}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6371462231923342, "res": {"Yes": 0.6371462231923342, "yes": 0.35844774130012946}, "ground_truth": 1}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7399485368496822, "res": {"Yes": 0.7399485368496822, "yes": 0.25549899571792933}, "ground_truth": 0}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6443551176076519, "res": {"Yes": 0.6443551176076519, "yes": 0.3504918588584256}, "ground_truth": 0}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.827632635485911, "res": {"Yes": 0.827632635485911, "yes": 0.1444272728795756}, "ground_truth": 0}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6973748276203894, "res": {"Yes": 0.6973748276203894, "yes": 0.28244393733537626}, "ground_truth": 0}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5554002022264186, "res": {"Yes": 0.5554002022264186, "yes": 0.424881148396105}, "ground_truth": 1}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7046360997493876, "res": {"Yes": 0.7046360997493876, "yes": 0.28391215558158356}, "ground_truth": 0}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6838091163298372, "res": {"Yes": 0.6838091163298372, "yes": 0.28509158484560226}, "ground_truth": 0}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8637380837010468, "res": {"Yes": 0.8637380837010468, "yes": 0.1281461332822862}, "ground_truth": 0}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.828418589057814, "res": {"Yes": 0.828418589057814, "yes": 0.15804253078415442}, "ground_truth": 0}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8779684850602962, "res": {"Yes": 0.8779684850602962, "yes": 0.11365542449434039}, "ground_truth": 1}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9461773839945389, "res": {"Yes": 0.9461773839945389, "yes": 0.04682347496511402}, "ground_truth": 0}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8762694097074296, "res": {"Yes": 0.8762694097074296, "yes": 0.11742772981881655}, "ground_truth": 0}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9159183507653229, "res": {"Yes": 0.9159183507653229, "yes": 0.08178241778339204}, "ground_truth": 0}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8559753756655658, "res": {"Yes": 0.8559753756655658, "yes": 0.13909900961886185}, "ground_truth": 0}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.891821088455553, "res": {"Yes": 0.891821088455553, "yes": 0.1048396121479609}, "ground_truth": 1}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8961510030653166, "res": {"Yes": 0.8961510030653166, "yes": 0.09785039973177055}, "ground_truth": 0}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8901449360678999, "res": {"Yes": 0.8901449360678999, "yes": 0.10318484370951868}, "ground_truth": 0}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8345383343725212, "res": {"Yes": 0.8345383343725212, "yes": 0.16184342879841054}, "ground_truth": 0}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8925494649062388, "res": {"Yes": 0.8925494649062388, "yes": 0.10391635315991925}, "ground_truth": 0}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8810990502711509, "res": {"Yes": 0.8810990502711509, "yes": 0.11586871454224121}, "ground_truth": 1}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8376924254020476, "res": {"Yes": 0.8376924254020476, "yes": 0.1580151443866694}, "ground_truth": 0}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8324652664332727, "res": {"Yes": 0.8324652664332727, "yes": 0.16386945077816198}, "ground_truth": 0}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9267986435586779, "res": {"Yes": 0.9267986435586779, "yes": 0.06095161050647067}, "ground_truth": 0}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.721934589827605, "res": {"Yes": 0.721934589827605, "yes": 0.269363395069669}, "ground_truth": 0}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7121186487147538, "res": {"Yes": 0.7121186487147538, "yes": 0.28072015375240095}, "ground_truth": 1}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7911815483817387, "res": {"Yes": 0.7911815483817387, "yes": 0.20299640691968995}, "ground_truth": 0}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8711319683514022, "res": {"Yes": 0.8711319683514022, "yes": 0.12435685295974137}, "ground_truth": 0}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.837906977526423, "res": {"Yes": 0.837906977526423, "yes": 0.15081900552751434}, "ground_truth": 0}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9381700935576197, "res": {"Yes": 0.9381700935576197, "yes": 0.05572345642273001}, "ground_truth": 0}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8263947781409298, "res": {"Yes": 0.8263947781409298, "yes": 0.16055235343085267}, "ground_truth": 1}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8792710913749238, "res": {"Yes": 0.8792710913749238, "yes": 0.11259123592602702}, "ground_truth": 0}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7740271720506653, "res": {"Yes": 0.7740271720506653, "yes": 0.20792881363467744}, "ground_truth": 0}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7043816875004865, "res": {"Yes": 0.7043816875004865, "yes": 0.2720532390602398}, "ground_truth": 0}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8028706351049965, "res": {"Yes": 0.8028706351049965, "yes": 0.17878589614048893}, "ground_truth": 0}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5864782124508645, "res": {"Yes": 0.5864782124508645, "yes": 0.3976580157420752}, "ground_truth": 1}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8156998684076995, "res": {"Yes": 0.8156998684076995, "yes": 0.17284718474834354}, "ground_truth": 0}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7931729621261403, "res": {"Yes": 0.7931729621261403, "yes": 0.18791741398142126}, "ground_truth": 0}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8575964140951151, "res": {"Yes": 0.8575964140951151, "yes": 0.11544001899178881}, "ground_truth": 0}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8109668266153988, "res": {"Yes": 0.8109668266153988, "yes": 0.17292731798791805}, "ground_truth": 0}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8148046318492705, "res": {"Yes": 0.8148046318492705, "yes": 0.17599149657956895}, "ground_truth": 1}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8812383630433832, "res": {"Yes": 0.8812383630433832, "yes": 0.11152678418763153}, "ground_truth": 0}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7847413468135515, "res": {"Yes": 0.7847413468135515, "yes": 0.20670556284610964}, "ground_truth": 0}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.899488860293578, "res": {"Yes": 0.899488860293578, "yes": 0.09090017978207997}, "ground_truth": 0}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5873928691351121, "res": {"Yes": 0.5873928691351121, "yes": 0.40258347996054294}, "ground_truth": 0}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9474730790891673, "res": {"Yes": 0.9474730790891673, "yes": 0.046119003061543734}, "ground_truth": 1}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7510353933747088, "res": {"Yes": 0.7510353933747088, "yes": 0.24166836143153905}, "ground_truth": 0}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9785942723953551, "res": {"Yes": 0.9785942723953551, "yes": 0.012885800632773918}, "ground_truth": 0}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7794772081307296, "res": {"Yes": 0.7794772081307296, "yes": 0.2097968494612393}, "ground_truth": 0}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6345318030966803, "res": {"Yes": 0.6345318030966803, "yes": 0.3583122378276897}, "ground_truth": 0}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4725452591853146, "res": {"yes": 0.5188147478984881, "Yes": 0.4725452591853146}, "ground_truth": 1}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9644529688824698, "res": {"Yes": 0.9644529688824698, "yes": 0.027668558020596548}, "ground_truth": 0}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9600000600194468, "res": {"Yes": 0.9600000600194468, "yes": 0.03338985391995686}, "ground_truth": 0}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8044676778905473, "res": {"Yes": 0.8044676778905473, "yes": 0.18717825141996894}, "ground_truth": 0}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.804339327267948, "res": {"Yes": 0.804339327267948, "yes": 0.18829215621034767}, "ground_truth": 0}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.815944574281157, "res": {"Yes": 0.815944574281157, "yes": 0.17638585953301622}, "ground_truth": 1}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8555842415108594, "res": {"Yes": 0.8555842415108594, "yes": 0.13338543958446378}, "ground_truth": 0}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8625472922375933, "res": {"Yes": 0.8625472922375933, "yes": 0.12775705836949608}, "ground_truth": 0}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7308458989540458, "res": {"Yes": 0.7308458989540458, "yes": 0.2647668107027274}, "ground_truth": 0}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6287711625521363, "res": {"Yes": 0.6287711625521363, "yes": 0.36284236327002645}, "ground_truth": 0}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7093727004315644, "res": {"Yes": 0.7093727004315644, "yes": 0.2847068569961359}, "ground_truth": 1}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8759976418512472, "res": {"Yes": 0.8759976418512472, "yes": 0.1188743412104342}, "ground_truth": 0}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7947279532980241, "res": {"Yes": 0.7947279532980241, "yes": 0.19730567560427054}, "ground_truth": 0}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8975427692452087, "res": {"Yes": 0.8975427692452087, "yes": 0.09509872656539438}, "ground_truth": 0}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8397767972613095, "res": {"Yes": 0.8397767972613095, "yes": 0.151610613246808}, "ground_truth": 0}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7860326562689763, "res": {"Yes": 0.7860326562689763, "yes": 0.19977239209855693}, "ground_truth": 1}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.907271159937761, "res": {"Yes": 0.907271159937761, "yes": 0.08052781711694292}, "ground_truth": 0}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8766564644987934, "res": {"Yes": 0.8766564644987934, "yes": 0.11479037960022181}, "ground_truth": 0}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8492411610022697, "res": {"Yes": 0.8492411610022697, "yes": 0.13559974283156803}, "ground_truth": 0}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6668659418491246, "res": {"Yes": 0.6668659418491246, "yes": 0.3151959720844362}, "ground_truth": 0}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9547921797592985, "res": {"Yes": 0.9547921797592985, "yes": 0.03235372234157816}, "ground_truth": 1}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8444085141348551, "res": {"Yes": 0.8444085141348551, "yes": 0.13806417026286438}, "ground_truth": 0}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8725142872220722, "res": {"Yes": 0.8725142872220722, "yes": 0.11143405478005139}, "ground_truth": 0}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8518323191768729, "res": {"Yes": 0.8518323191768729, "yes": 0.13679359432495428}, "ground_truth": 0}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7535767033586811, "res": {"Yes": 0.7535767033586811, "yes": 0.23732048356175361}, "ground_truth": 0}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9707838558561417, "res": {"Yes": 0.9707838558561417, "yes": 0.013829609077139728}, "ground_truth": 1}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.773184382791925, "res": {"Yes": 0.773184382791925, "yes": 0.21776101042594315}, "ground_truth": 0}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.894153122956745, "res": {"Yes": 0.894153122956745, "yes": 0.10221161070340337}, "ground_truth": 0}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8031793419975434, "res": {"Yes": 0.8031793419975434, "yes": 0.19220168799352294}, "ground_truth": 0}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7217512345104709, "res": {"Yes": 0.7217512345104709, "yes": 0.2642563828328364}, "ground_truth": 0}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7497601901861748, "res": {"Yes": 0.7497601901861748, "yes": 0.24582855915777557}, "ground_truth": 1}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6355623191385014, "res": {"Yes": 0.6355623191385014, "yes": 0.33655843153304354}, "ground_truth": 0}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7394493584308008, "res": {"Yes": 0.7394493584308008, "yes": 0.2494267485209153}, "ground_truth": 0}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7282519030235798, "res": {"Yes": 0.7282519030235798, "yes": 0.26371083450132726}, "ground_truth": 0}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7187812568678539, "res": {"Yes": 0.7187812568678539, "yes": 0.2715487765345795}, "ground_truth": 0}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7541842178950008, "res": {"Yes": 0.7541842178950008, "yes": 0.23351464134640382}, "ground_truth": 1}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.972041368735401, "res": {"Yes": 0.972041368735401, "yes": 0.01941908759446451}, "ground_truth": 0}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6395795958205559, "res": {"Yes": 0.6395795958205559, "yes": 0.3549467147743219}, "ground_truth": 0}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8835749568654991, "res": {"Yes": 0.8835749568654991, "yes": 0.10661948476942651}, "ground_truth": 0}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8359094793913159, "res": {"Yes": 0.8359094793913159, "yes": 0.15404204412982025}, "ground_truth": 0}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7901269106744265, "res": {"Yes": 0.7901269106744265, "yes": 0.1977356581742183}, "ground_truth": 1}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8127730822636882, "res": {"Yes": 0.8127730822636882, "yes": 0.178845549064996}, "ground_truth": 0}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6366104620557637, "res": {"Yes": 0.6366104620557637, "yes": 0.3506779493211472}, "ground_truth": 0}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8066435870531146, "res": {"Yes": 0.8066435870531146, "yes": 0.1809092598894716}, "ground_truth": 0}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9873015183350184, "res": {"Yes": 0.9873015183350184, "yes": 0.00868861266278931}, "ground_truth": 0}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9413963633930347, "res": {"Yes": 0.9413963633930347, "yes": 0.04911877177679229}, "ground_truth": 1}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8563574227915541, "res": {"Yes": 0.8563574227915541, "yes": 0.1347931362266812}, "ground_truth": 0}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8099808690916029, "res": {"Yes": 0.8099808690916029, "yes": 0.17575849741762314}, "ground_truth": 0}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8281096134210706, "res": {"Yes": 0.8281096134210706, "yes": 0.16173346119657941}, "ground_truth": 0}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7095840966136242, "res": {"Yes": 0.7095840966136242, "yes": 0.2787921100983424}, "ground_truth": 0}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9234604467351671, "res": {"Yes": 0.9234604467351671, "yes": 0.07051087681501293}, "ground_truth": 1}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8815440559535092, "res": {"Yes": 0.8815440559535092, "yes": 0.11380236220410775}, "ground_truth": 0}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8655457899398964, "res": {"Yes": 0.8655457899398964, "yes": 0.12318680942133499}, "ground_truth": 0}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.765095818544314, "res": {"Yes": 0.765095818544314, "yes": 0.21489496609816514}, "ground_truth": 0}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7495372971268655, "res": {"Yes": 0.7495372971268655, "yes": 0.22327327708550362}, "ground_truth": 0}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8259074894318158, "res": {"Yes": 0.8259074894318158, "yes": 0.1583424161663138}, "ground_truth": 1}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9273632636544006, "res": {"Yes": 0.9273632636544006, "yes": 0.06141313141508907}, "ground_truth": 0}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9240103519445665, "res": {"Yes": 0.9240103519445665, "yes": 0.06810695103752193}, "ground_truth": 0}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.785431209932832, "res": {"Yes": 0.785431209932832, "yes": 0.21114710675852347}, "ground_truth": 0}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7035448160853138, "res": {"Yes": 0.7035448160853138, "yes": 0.2913728093420737}, "ground_truth": 0}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7379966084414616, "res": {"Yes": 0.7379966084414616, "yes": 0.2579808039817718}, "ground_truth": 1}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8320053149668689, "res": {"Yes": 0.8320053149668689, "yes": 0.1621566719982644}, "ground_truth": 0}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.706000114011169, "res": {"Yes": 0.706000114011169, "yes": 0.2891944441446927}, "ground_truth": 0}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8180744187913169, "res": {"Yes": 0.8180744187913169, "yes": 0.16958001477148363}, "ground_truth": 0}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8833428987102714, "res": {"Yes": 0.8833428987102714, "yes": 0.11243733890769175}, "ground_truth": 0}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5504719597962315, "res": {"Yes": 0.5504719597962315, "yes": 0.43144329101666556}, "ground_truth": 1}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8474934401444707, "res": {"Yes": 0.8474934401444707, "yes": 0.1378176285589775}, "ground_truth": 0}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7919544115489401, "res": {"Yes": 0.7919544115489401, "yes": 0.19759661981778115}, "ground_truth": 0}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8726722353278643, "res": {"Yes": 0.8726722353278643, "yes": 0.11311190958877723}, "ground_truth": 0}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9491696331293256, "res": {"Yes": 0.9491696331293256, "yes": 0.04429429904240437}, "ground_truth": 0}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7614536539670318, "res": {"Yes": 0.7614536539670318, "yes": 0.22183582449463296}, "ground_truth": 1}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7878338985385277, "res": {"Yes": 0.7878338985385277, "yes": 0.2031079028551372}, "ground_truth": 0}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8969330889179534, "res": {"Yes": 0.8969330889179534, "yes": 0.08709825759270089}, "ground_truth": 0}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9157177774519042, "res": {"Yes": 0.9157177774519042, "yes": 0.07646968432683303}, "ground_truth": 0}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9238384687434595, "res": {"Yes": 0.9238384687434595, "yes": 0.07127165330274599}, "ground_truth": 0}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7890624898040167, "res": {"Yes": 0.7890624898040167, "yes": 0.19983071417150292}, "ground_truth": 1}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9151154287717069, "res": {"Yes": 0.9151154287717069, "yes": 0.07704784395665994}, "ground_truth": 0}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8100117785513162, "res": {"Yes": 0.8100117785513162, "yes": 0.17728673241086035}, "ground_truth": 0}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9095069677279817, "res": {"Yes": 0.9095069677279817, "yes": 0.084561516801852}, "ground_truth": 0}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9164552728691534, "res": {"Yes": 0.9164552728691534, "yes": 0.07268625332580884}, "ground_truth": 0}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9003816139743062, "res": {"Yes": 0.9003816139743062, "yes": 0.08985882961168543}, "ground_truth": 1}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8895152834102203, "res": {"Yes": 0.8895152834102203, "yes": 0.09971190509156197}, "ground_truth": 0}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8641587488160962, "res": {"Yes": 0.8641587488160962, "yes": 0.12507939291224673}, "ground_truth": 0}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.4883462773654314, "res": {"yes": 0.5016234200819082, "Yes": 0.4883462773654314}, "ground_truth": 0}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7208748753222, "res": {"Yes": 0.7208748753222, "yes": 0.2730533176824997}, "ground_truth": 0}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7685696646110409, "res": {"Yes": 0.7685696646110409, "yes": 0.2211353547527328}, "ground_truth": 1}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9017008786856863, "res": {"Yes": 0.9017008786856863, "yes": 0.09330281500878645}, "ground_truth": 0}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8827882152279478, "res": {"Yes": 0.8827882152279478, "yes": 0.11303707689188204}, "ground_truth": 0}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.797604163749672, "res": {"Yes": 0.797604163749672, "yes": 0.19454818671520913}, "ground_truth": 0}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8470486059221758, "res": {"Yes": 0.8470486059221758, "yes": 0.14455128802137296}, "ground_truth": 0}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8337447321776248, "res": {"Yes": 0.8337447321776248, "yes": 0.16046153860321893}, "ground_truth": 1}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9135208948743482, "res": {"Yes": 0.9135208948743482, "yes": 0.08362484617477588}, "ground_truth": 0}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7892148172378328, "res": {"Yes": 0.7892148172378328, "yes": 0.20588055674095124}, "ground_truth": 0}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7884482702455227, "res": {"Yes": 0.7884482702455227, "yes": 0.20671610509863994}, "ground_truth": 0}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.735322132701298, "res": {"Yes": 0.735322132701298, "yes": 0.2594246200699777}, "ground_truth": 0}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7004719687960185, "res": {"Yes": 0.7004719687960185, "yes": 0.2901137765537782}, "ground_truth": 1}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8517237685831077, "res": {"Yes": 0.8517237685831077, "yes": 0.14064255194231928}, "ground_truth": 0}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8482206470861988, "res": {"Yes": 0.8482206470861988, "yes": 0.14296998011874534}, "ground_truth": 0}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6829860528803546, "res": {"Yes": 0.6829860528803546, "yes": 0.3043998639315886}, "ground_truth": 0}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7716335328832844, "res": {"Yes": 0.7716335328832844, "yes": 0.21955908560047058}, "ground_truth": 0}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5908994921987855, "res": {"Yes": 0.5908994921987855, "yes": 0.39647099229136373}, "ground_truth": 1}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.742493360821355, "res": {"Yes": 0.742493360821355, "yes": 0.24709035488377423}, "ground_truth": 0}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6787577294684575, "res": {"Yes": 0.6787577294684575, "yes": 0.30318718255578997}, "ground_truth": 0}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8324548606824783, "res": {"Yes": 0.8324548606824783, "yes": 0.15830099920781907}, "ground_truth": 0}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.43093750473794934, "res": {"yes": 0.5088426732967418, "Yes": 0.43093750473794934}, "ground_truth": 0}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4463136290260233, "res": {"yes": 0.4692275720356504, "Yes": 0.4463136290260233}, "ground_truth": 1}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4646269283672101, "res": {"yes": 0.49104578846800107, "Yes": 0.4646269283672101}, "ground_truth": 0}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7805423581246572, "res": {"Yes": 0.7805423581246572, "yes": 0.21158933118494372}, "ground_truth": 0}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7562913979835911, "res": {"Yes": 0.7562913979835911, "yes": 0.23483237904575896}, "ground_truth": 0}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7881156152706155, "res": {"Yes": 0.7881156152706155, "yes": 0.20364805411067988}, "ground_truth": 0}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7752477913171202, "res": {"Yes": 0.7752477913171202, "yes": 0.19580034243212477}, "ground_truth": 1}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.808567779367614, "res": {"Yes": 0.808567779367614, "yes": 0.1846097576440881}, "ground_truth": 0}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7785842598302853, "res": {"Yes": 0.7785842598302853, "yes": 0.21369006674047575}, "ground_truth": 0}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6002248163677423, "res": {"Yes": 0.6002248163677423, "yes": 0.38354351367849043}, "ground_truth": 0}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8423347065585216, "res": {"Yes": 0.8423347065585216, "yes": 0.14934370268846087}, "ground_truth": 0}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8069823920268333, "res": {"Yes": 0.8069823920268333, "yes": 0.18316683038231002}, "ground_truth": 1}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7810100664851284, "res": {"Yes": 0.7810100664851284, "yes": 0.2070169510373854}, "ground_truth": 0}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8827937891703357, "res": {"Yes": 0.8827937891703357, "yes": 0.10990722259854772}, "ground_truth": 0}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.981560810976968, "res": {"Yes": 0.981560810976968, "yes": 0.012918421022202444}, "ground_truth": 0}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9838848176648783, "res": {"Yes": 0.9838848176648783, "yes": 0.012877692861638878}, "ground_truth": 0}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8348446411043391, "res": {"Yes": 0.8348446411043391, "yes": 0.1585197639099836}, "ground_truth": 1}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9251217995027627, "res": {"Yes": 0.9251217995027627, "yes": 0.07031082563908136}, "ground_truth": 0}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9121548606303553, "res": {"Yes": 0.9121548606303553, "yes": 0.08197854600790354}, "ground_truth": 0}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8951797481055345, "res": {"Yes": 0.8951797481055345, "yes": 0.0996614436980608}, "ground_truth": 0}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8705870715675408, "res": {"Yes": 0.8705870715675408, "yes": 0.12428429947352058}, "ground_truth": 0}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8859925650532674, "res": {"Yes": 0.8859925650532674, "yes": 0.11010492546158071}, "ground_truth": 1}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9266119124457922, "res": {"Yes": 0.9266119124457922, "yes": 0.06751696360572411}, "ground_truth": 0}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.935506832232156, "res": {"Yes": 0.935506832232156, "yes": 0.059271790169654545}, "ground_truth": 0}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8808703378360122, "res": {"Yes": 0.8808703378360122, "yes": 0.1094581494045801}, "ground_truth": 0}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8949371738382189, "res": {"Yes": 0.8949371738382189, "yes": 0.0881802644568324}, "ground_truth": 0}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8407907171310721, "res": {"Yes": 0.8407907171310721, "yes": 0.1489176217632576}, "ground_truth": 1}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8719222603692803, "res": {"Yes": 0.8719222603692803, "yes": 0.12257947302046425}, "ground_truth": 0}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8478070029108425, "res": {"Yes": 0.8478070029108425, "yes": 0.1427700335767965}, "ground_truth": 0}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.760148112363879, "res": {"Yes": 0.760148112363879, "yes": 0.23541981778447318}, "ground_truth": 0}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5798614751011728, "res": {"Yes": 0.5798614751011728, "yes": 0.3996130560274958}, "ground_truth": 0}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7484639793959074, "res": {"Yes": 0.7484639793959074, "yes": 0.24227590474412736}, "ground_truth": 1}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8304303654353304, "res": {"Yes": 0.8304303654353304, "yes": 0.166322251228349}, "ground_truth": 0}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9433937699314773, "res": {"Yes": 0.9433937699314773, "yes": 0.04438023350499414}, "ground_truth": 0}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7265363229815062, "res": {"Yes": 0.7265363229815062, "yes": 0.23952684038428176}, "ground_truth": 0}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9256290393993573, "res": {"Yes": 0.9256290393993573, "yes": 0.061270468854818926}, "ground_truth": 0}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8975992408225428, "res": {"Yes": 0.8975992408225428, "yes": 0.08901806075553498}, "ground_truth": 1}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8838737175493334, "res": {"Yes": 0.8838737175493334, "yes": 0.10224997792005823}, "ground_truth": 0}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.93042910688204, "res": {"Yes": 0.93042910688204, "yes": 0.061450260731065354}, "ground_truth": 0}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5158032420890868, "res": {"Yes": 0.5158032420890868, "yes": 0.4759462268189178}, "ground_truth": 0}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9727196896965081, "res": {"Yes": 0.9727196896965081, "yes": 0.0228052045348225}, "ground_truth": 0}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5896881742604348, "res": {"Yes": 0.5896881742604348, "yes": 0.40056462687943933}, "ground_truth": 1}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7384942375148862, "res": {"Yes": 0.7384942375148862, "yes": 0.25187083056658455}, "ground_truth": 0}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9743045622199663, "res": {"Yes": 0.9743045622199663, "yes": 0.020406744908690346}, "ground_truth": 0}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.39628699316624305, "res": {"yes": 0.5987333970465849, "Yes": 0.39628699316624305}, "ground_truth": 0}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5725441950152808, "res": {"Yes": 0.5725441950152808, "yes": 0.41671435162587656}, "ground_truth": 0}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6885710987408385, "res": {"Yes": 0.6885710987408385, "yes": 0.2966286972241408}, "ground_truth": 1}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7192467680936938, "res": {"Yes": 0.7192467680936938, "yes": 0.26714352985985984}, "ground_truth": 0}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7096689892368531, "res": {"Yes": 0.7096689892368531, "yes": 0.2770463661442666}, "ground_truth": 0}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.696356128939684, "res": {"Yes": 0.696356128939684, "yes": 0.29439266721363366}, "ground_truth": 0}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9378982090509383, "res": {"Yes": 0.9378982090509383, "yes": 0.06124823783573392}, "ground_truth": 0}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8322904171444694, "res": {"Yes": 0.8322904171444694, "yes": 0.16257851901085155}, "ground_truth": 1}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.91919588424304, "res": {"Yes": 0.91919588424304, "yes": 0.07823999634271629}, "ground_truth": 0}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9106253205698045, "res": {"Yes": 0.9106253205698045, "yes": 0.07966539326243655}, "ground_truth": 0}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7220605495232911, "res": {"Yes": 0.7220605495232911, "yes": 0.2613170390023609}, "ground_truth": 0}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6849855060464809, "res": {"Yes": 0.6849855060464809, "yes": 0.3054564389191546}, "ground_truth": 0}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8371457833526939, "res": {"Yes": 0.8371457833526939, "yes": 0.14855513651307375}, "ground_truth": 1}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7956767170435918, "res": {"Yes": 0.7956767170435918, "yes": 0.19575866097636938}, "ground_truth": 0}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9431096332802993, "res": {"Yes": 0.9431096332802993, "yes": 0.048753611608766304}, "ground_truth": 0}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6079391404318728, "res": {"Yes": 0.6079391404318728, "yes": 0.3844087651449088}, "ground_truth": 0}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.49463616840074964, "res": {"Yes": 0.49463616840074964, "yes": 0.49440364514486906}, "ground_truth": 0}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4270702198399178, "res": {"yes": 0.5610007144112011, "Yes": 0.4270702198399178}, "ground_truth": 1}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9812306056730815, "res": {"Yes": 0.9812306056730815, "yes": 0.015086298970532253}, "ground_truth": 0}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5221902468139559, "res": {"Yes": 0.5221902468139559, "yes": 0.4664960294929927}, "ground_truth": 0}, {"key": "7c72b57e65ddefc77362ed64f7dbc0aebb87ac81", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8227961256112258, "res": {"Yes": 0.8227961256112258, "yes": 0.16776821916442677}, "ground_truth": 0}, {"key": "7c72b57e65ddefc77362ed64f7dbc0aebb87ac81", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7335596223293006, "res": {"Yes": 0.7335596223293006, "yes": 0.2378321514790196}, "ground_truth": 0}, {"key": "7c72b57e65ddefc77362ed64f7dbc0aebb87ac81", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7509658056592073, "res": {"Yes": 0.7509658056592073, "yes": 0.22252725288502176}, "ground_truth": 1}, {"key": "7c72b57e65ddefc77362ed64f7dbc0aebb87ac81", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8431644271500504, "res": {"Yes": 0.8431644271500504, "yes": 0.10735942087582302}, "ground_truth": 0}, {"key": "7c72b57e65ddefc77362ed64f7dbc0aebb87ac81", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8686982866788066, "res": {"Yes": 0.8686982866788066, "yes": 0.12727488280443686}, "ground_truth": 0}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9681668963329246, "res": {"Yes": 0.9681668963329246, "yes": 0.026351307097680438}, "ground_truth": 0}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9144368204303146, "res": {"Yes": 0.9144368204303146, "yes": 0.07537868019392464}, "ground_truth": 0}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8785309249346511, "res": {"Yes": 0.8785309249346511, "yes": 0.11060205606137656}, "ground_truth": 1}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9240166666528891, "res": {"Yes": 0.9240166666528891, "yes": 0.06207189928146415}, "ground_truth": 0}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.75741362841045, "res": {"Yes": 0.75741362841045, "yes": 0.22801380582428207}, "ground_truth": 0}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7089131403780794, "res": {"Yes": 0.7089131403780794, "yes": 0.27491228385406924}, "ground_truth": 0}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8417508172899442, "res": {"Yes": 0.8417508172899442, "yes": 0.1469603436039429}, "ground_truth": 0}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7703472436574671, "res": {"Yes": 0.7703472436574671, "yes": 0.22099568556174343}, "ground_truth": 1}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8391458245912712, "res": {"Yes": 0.8391458245912712, "yes": 0.1481755291795427}, "ground_truth": 0}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8713829079169988, "res": {"Yes": 0.8713829079169988, "yes": 0.12124142176466327}, "ground_truth": 0}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6379904528080614, "res": {"Yes": 0.6379904528080614, "yes": 0.3589042281642813}, "ground_truth": 0}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9681980727771071, "res": {"Yes": 0.9681980727771071, "yes": 0.02946436240469603}, "ground_truth": 0}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9674858785487167, "res": {"Yes": 0.9674858785487167, "yes": 0.0251147137353394}, "ground_truth": 1}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9133458353704582, "res": {"Yes": 0.9133458353704582, "yes": 0.07964524046737724}, "ground_truth": 0}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5046887885349772, "res": {"Yes": 0.5046887885349772, "yes": 0.4853424425885198}, "ground_truth": 0}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9255016908609699, "res": {"Yes": 0.9255016908609699, "yes": 0.06958429386267956}, "ground_truth": 0}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9223734629589071, "res": {"Yes": 0.9223734629589071, "yes": 0.07304940397180712}, "ground_truth": 0}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9382144794346855, "res": {"Yes": 0.9382144794346855, "yes": 0.057353179308699855}, "ground_truth": 1}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9510266535148459, "res": {"Yes": 0.9510266535148459, "yes": 0.0440377701847535}, "ground_truth": 0}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9716043215446735, "res": {"Yes": 0.9716043215446735, "yes": 0.024693463648109485}, "ground_truth": 0}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6168616810688263, "res": {"Yes": 0.6168616810688263, "yes": 0.3785879759332798}, "ground_truth": 0}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6720958397180544, "res": {"Yes": 0.6720958397180544, "yes": 0.3199286425775249}, "ground_truth": 0}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.569933666207618, "res": {"Yes": 0.569933666207618, "yes": 0.4159568125898084}, "ground_truth": 1}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5245498881683871, "res": {"Yes": 0.5245498881683871, "yes": 0.4663090207390276}, "ground_truth": 0}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6221879882454199, "res": {"Yes": 0.6221879882454199, "yes": 0.37120844417966203}, "ground_truth": 0}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7530667798912454, "res": {"Yes": 0.7530667798912454, "yes": 0.23718339939378158}, "ground_truth": 0}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6304719771531634, "res": {"Yes": 0.6304719771531634, "yes": 0.36157344162318633}, "ground_truth": 0}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8145686985926716, "res": {"Yes": 0.8145686985926716, "yes": 0.17978764042346454}, "ground_truth": 1}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8653823037819305, "res": {"Yes": 0.8653823037819305, "yes": 0.12961565598585345}, "ground_truth": 0}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8264547848442876, "res": {"Yes": 0.8264547848442876, "yes": 0.15800566376242778}, "ground_truth": 0}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9148994504166679, "res": {"Yes": 0.9148994504166679, "yes": 0.07966607042115723}, "ground_truth": 0}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8537691823534256, "res": {"Yes": 0.8537691823534256, "yes": 0.1394154576203662}, "ground_truth": 0}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8154707134100203, "res": {"Yes": 0.8154707134100203, "yes": 0.17171621114745794}, "ground_truth": 1}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9287158996660043, "res": {"Yes": 0.9287158996660043, "yes": 0.0644953597260603}, "ground_truth": 0}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.859252030626847, "res": {"Yes": 0.859252030626847, "yes": 0.13359212022109873}, "ground_truth": 0}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.959177882320068, "res": {"Yes": 0.959177882320068, "yes": 0.03589017492192014}, "ground_truth": 0}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9491163643768206, "res": {"Yes": 0.9491163643768206, "yes": 0.04766354638440487}, "ground_truth": 0}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9146333265808028, "res": {"Yes": 0.9146333265808028, "yes": 0.0786274817556423}, "ground_truth": 1}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9192022267165229, "res": {"Yes": 0.9192022267165229, "yes": 0.07596484587373528}, "ground_truth": 0}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9056456628916696, "res": {"Yes": 0.9056456628916696, "yes": 0.08976696898629054}, "ground_truth": 0}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8837609424581779, "res": {"Yes": 0.8837609424581779, "yes": 0.11074336295768156}, "ground_truth": 0}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9073880330629246, "res": {"Yes": 0.9073880330629246, "yes": 0.08675345793737735}, "ground_truth": 0}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9680415394222395, "res": {"Yes": 0.9680415394222395, "yes": 0.02387206337930804}, "ground_truth": 1}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9627305654383342, "res": {"Yes": 0.9627305654383342, "yes": 0.03215369194176269}, "ground_truth": 0}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8745276211019809, "res": {"Yes": 0.8745276211019809, "yes": 0.1116861693700309}, "ground_truth": 0}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.822856340034917, "res": {"Yes": 0.822856340034917, "yes": 0.1691241361947776}, "ground_truth": 0}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6383988080103427, "res": {"Yes": 0.6383988080103427, "yes": 0.35405939409233117}, "ground_truth": 0}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8101112784087343, "res": {"Yes": 0.8101112784087343, "yes": 0.18142429857927503}, "ground_truth": 1}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7965654985948407, "res": {"Yes": 0.7965654985948407, "yes": 0.19815587708650392}, "ground_truth": 0}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.708708414562646, "res": {"Yes": 0.708708414562646, "yes": 0.2807679363887566}, "ground_truth": 0}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8208113636981332, "res": {"Yes": 0.8208113636981332, "yes": 0.1713860439492923}, "ground_truth": 0}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7320260340581924, "res": {"Yes": 0.7320260340581924, "yes": 0.26221371542520305}, "ground_truth": 0}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7594992305861118, "res": {"Yes": 0.7594992305861118, "yes": 0.23453430777136533}, "ground_truth": 1}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7510646392623406, "res": {"Yes": 0.7510646392623406, "yes": 0.24132379488567687}, "ground_truth": 0}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6988757125980029, "res": {"Yes": 0.6988757125980029, "yes": 0.2894915994019357}, "ground_truth": 0}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8503134268450174, "res": {"Yes": 0.8503134268450174, "yes": 0.14141540290735022}, "ground_truth": 0}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.920526853502126, "res": {"Yes": 0.920526853502126, "yes": 0.07388887526839727}, "ground_truth": 0}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9511176682673572, "res": {"Yes": 0.9511176682673572, "yes": 0.04363382179964478}, "ground_truth": 1}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9451616319626505, "res": {"Yes": 0.9451616319626505, "yes": 0.049253137884637324}, "ground_truth": 0}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8884096453772802, "res": {"Yes": 0.8884096453772802, "yes": 0.10387664391866089}, "ground_truth": 0}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9791975355761159, "res": {"Yes": 0.9791975355761159, "yes": 0.01734971845670817}, "ground_truth": 0}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6534122055052283, "res": {"Yes": 0.6534122055052283, "yes": 0.3418442688036879}, "ground_truth": 0}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7393376952205918, "res": {"Yes": 0.7393376952205918, "yes": 0.25825693952443746}, "ground_truth": 1}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9722668102955953, "res": {"Yes": 0.9722668102955953, "yes": 0.025063434194913876}, "ground_truth": 0}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7221295457050382, "res": {"Yes": 0.7221295457050382, "yes": 0.2718104855029022}, "ground_truth": 0}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9560704300920063, "res": {"Yes": 0.9560704300920063, "yes": 0.03869282873973899}, "ground_truth": 0}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.898227052690047, "res": {"Yes": 0.898227052690047, "yes": 0.09481781172265534}, "ground_truth": 0}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9482194558546897, "res": {"Yes": 0.9482194558546897, "yes": 0.04748129191232289}, "ground_truth": 1}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9458294959634245, "res": {"Yes": 0.9458294959634245, "yes": 0.04994893976693669}, "ground_truth": 0}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9404167160559419, "res": {"Yes": 0.9404167160559419, "yes": 0.054122352954925027}, "ground_truth": 0}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6475762575921328, "res": {"Yes": 0.6475762575921328, "yes": 0.33583161179878884}, "ground_truth": 0}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8022180702768908, "res": {"Yes": 0.8022180702768908, "yes": 0.1589439586836565}, "ground_truth": 0}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8263342221503861, "res": {"Yes": 0.8263342221503861, "yes": 0.16516222603557631}, "ground_truth": 1}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7696494711937187, "res": {"Yes": 0.7696494711937187, "yes": 0.2205063690349811}, "ground_truth": 0}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8409686472711619, "res": {"Yes": 0.8409686472711619, "yes": 0.15285338270656373}, "ground_truth": 0}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.83784687661447, "res": {"Yes": 0.83784687661447, "yes": 0.14696882346041035}, "ground_truth": 0}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8336791774066533, "res": {"Yes": 0.8336791774066533, "yes": 0.15412207434959022}, "ground_truth": 0}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8100691861202753, "res": {"Yes": 0.8100691861202753, "yes": 0.18017551857532013}, "ground_truth": 1}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7529623367721723, "res": {"Yes": 0.7529623367721723, "yes": 0.2396118875075894}, "ground_truth": 0}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8651794213790107, "res": {"Yes": 0.8651794213790107, "yes": 0.12768384900744634}, "ground_truth": 0}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7909721424312139, "res": {"Yes": 0.7909721424312139, "yes": 0.20183151778145708}, "ground_truth": 0}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8447706065656224, "res": {"Yes": 0.8447706065656224, "yes": 0.14880529091363492}, "ground_truth": 0}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.857166668754104, "res": {"Yes": 0.857166668754104, "yes": 0.1355829837390873}, "ground_truth": 1}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8947847208532932, "res": {"Yes": 0.8947847208532932, "yes": 0.09756730806739808}, "ground_truth": 0}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8785070819288994, "res": {"Yes": 0.8785070819288994, "yes": 0.11338986109715238}, "ground_truth": 0}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7718269284780809, "res": {"Yes": 0.7718269284780809, "yes": 0.22359008081000742}, "ground_truth": 0}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.557819731848357, "res": {"Yes": 0.557819731848357, "yes": 0.43711135474351576}, "ground_truth": 0}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7750981596781245, "res": {"Yes": 0.7750981596781245, "yes": 0.2183047841899715}, "ground_truth": 1}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8396469274249384, "res": {"Yes": 0.8396469274249384, "yes": 0.15436659784439388}, "ground_truth": 0}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7168362117306434, "res": {"Yes": 0.7168362117306434, "yes": 0.2706241827747281}, "ground_truth": 0}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9674086598100736, "res": {"Yes": 0.9674086598100736, "yes": 0.026988857050391536}, "ground_truth": 0}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.873281084144751, "res": {"Yes": 0.873281084144751, "yes": 0.1095395727539268}, "ground_truth": 0}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.879252108116986, "res": {"Yes": 0.879252108116986, "yes": 0.10992585345169839}, "ground_truth": 1}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9281363816583758, "res": {"Yes": 0.9281363816583758, "yes": 0.0508232729687901}, "ground_truth": 0}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8733055538235474, "res": {"Yes": 0.8733055538235474, "yes": 0.11701158874346193}, "ground_truth": 0}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8691241052909889, "res": {"Yes": 0.8691241052909889, "yes": 0.1255941754285321}, "ground_truth": 0}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9591123017335713, "res": {"Yes": 0.9591123017335713, "yes": 0.034438773937938975}, "ground_truth": 0}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9682622199607409, "res": {"Yes": 0.9682622199607409, "yes": 0.02280997816367447}, "ground_truth": 1}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7023792350177906, "res": {"Yes": 0.7023792350177906, "yes": 0.2923705717445301}, "ground_truth": 0}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5531435649675793, "res": {"Yes": 0.5531435649675793, "yes": 0.4376806693123954}, "ground_truth": 0}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7905044839916457, "res": {"Yes": 0.7905044839916457, "yes": 0.1964468043694547}, "ground_truth": 0}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6320384298242445, "res": {"Yes": 0.6320384298242445, "yes": 0.3547318241193125}, "ground_truth": 0}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6383059277409806, "res": {"Yes": 0.6383059277409806, "yes": 0.35065084296333887}, "ground_truth": 1}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6691158064653799, "res": {"Yes": 0.6691158064653799, "yes": 0.32203694628639534}, "ground_truth": 0}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8479274933252698, "res": {"Yes": 0.8479274933252698, "yes": 0.14568634356433616}, "ground_truth": 0}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9560674777510766, "res": {"Yes": 0.9560674777510766, "yes": 0.03962395972999871}, "ground_truth": 0}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9428142202843692, "res": {"Yes": 0.9428142202843692, "yes": 0.04813265664077529}, "ground_truth": 0}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9471034775208864, "res": {"Yes": 0.9471034775208864, "yes": 0.04466669202391084}, "ground_truth": 1}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7972103960687817, "res": {"Yes": 0.7972103960687817, "yes": 0.19684788615581175}, "ground_truth": 0}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.956559365199075, "res": {"Yes": 0.956559365199075, "yes": 0.03508210877403379}, "ground_truth": 0}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9223698159014447, "res": {"Yes": 0.9223698159014447, "yes": 0.05970634397448467}, "ground_truth": 0}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8730387036913547, "res": {"Yes": 0.8730387036913547, "yes": 0.11396538694272049}, "ground_truth": 0}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8833794610295009, "res": {"Yes": 0.8833794610295009, "yes": 0.10062559804064525}, "ground_truth": 1}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8652061125758699, "res": {"Yes": 0.8652061125758699, "yes": 0.11997936611820212}, "ground_truth": 0}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.955313309051617, "res": {"Yes": 0.955313309051617, "yes": 0.038368901663852255}, "ground_truth": 0}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9664643081366191, "res": {"Yes": 0.9664643081366191, "yes": 0.026733874763075306}, "ground_truth": 0}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7933104547111163, "res": {"Yes": 0.7933104547111163, "yes": 0.20229114649548594}, "ground_truth": 0}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7964565119941563, "res": {"Yes": 0.7964565119941563, "yes": 0.19790556628761818}, "ground_truth": 1}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9062330784887414, "res": {"Yes": 0.9062330784887414, "yes": 0.0879744893747425}, "ground_truth": 0}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8079555131172466, "res": {"Yes": 0.8079555131172466, "yes": 0.1868243141976562}, "ground_truth": 0}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9673522199330862, "res": {"Yes": 0.9673522199330862, "yes": 0.02410627497427443}, "ground_truth": 0}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9688663134743098, "res": {"Yes": 0.9688663134743098, "yes": 0.024949517391443618}, "ground_truth": 0}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9472259364456816, "res": {"Yes": 0.9472259364456816, "yes": 0.03882441413312767}, "ground_truth": 1}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7101990117383185, "res": {"Yes": 0.7101990117383185, "yes": 0.2784499356631491}, "ground_truth": 0}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.934204625155429, "res": {"Yes": 0.934204625155429, "yes": 0.05289645265117975}, "ground_truth": 0}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9570710862354502, "res": {"Yes": 0.9570710862354502, "yes": 0.03760649139512849}, "ground_truth": 0}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8575419756026443, "res": {"Yes": 0.8575419756026443, "yes": 0.13212669580774067}, "ground_truth": 0}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9807954557681536, "res": {"Yes": 0.9807954557681536, "yes": 0.016232259331947975}, "ground_truth": 1}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8884158553824053, "res": {"Yes": 0.8884158553824053, "yes": 0.10022120359436222}, "ground_truth": 0}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9196800142840391, "res": {"Yes": 0.9196800142840391, "yes": 0.0696055481513899}, "ground_truth": 0}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9744824816046055, "res": {"Yes": 0.9744824816046055, "yes": 0.018865064623410695}, "ground_truth": 0}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9274453945817336, "res": {"Yes": 0.9274453945817336, "yes": 0.058469209303524186}, "ground_truth": 0}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9695115122016474, "res": {"Yes": 0.9695115122016474, "yes": 0.024807826274748694}, "ground_truth": 1}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7001255380127617, "res": {"Yes": 0.7001255380127617, "yes": 0.2839747651143366}, "ground_truth": 0}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.351717669676778, "res": {"yes": 0.6425110396665424, "Yes": 0.351717669676778}, "ground_truth": 0}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7902146670421469, "res": {"Yes": 0.7902146670421469, "yes": 0.1989379722662146}, "ground_truth": 0}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8840606713064548, "res": {"Yes": 0.8840606713064548, "yes": 0.10525804208289723}, "ground_truth": 0}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8722959940089263, "res": {"Yes": 0.8722959940089263, "yes": 0.11830014290622555}, "ground_truth": 1}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9372096554107475, "res": {"Yes": 0.9372096554107475, "yes": 0.056723959010290224}, "ground_truth": 0}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8994438240138358, "res": {"Yes": 0.8994438240138358, "yes": 0.0986493337708077}, "ground_truth": 0}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5857245037912301, "res": {"Yes": 0.5857245037912301, "yes": 0.3985610870612349}, "ground_truth": 0}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5676623052933583, "res": {"Yes": 0.5676623052933583, "yes": 0.414401834982539}, "ground_truth": 0}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.49354144098847225, "res": {"Yes": 0.49354144098847225, "yes": 0.4831510699546491}, "ground_truth": 1}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2882143225749776, "res": {"yes": 0.700143461455954, "Yes": 0.2882143225749776}, "ground_truth": 0}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5186629649101872, "res": {"Yes": 0.5186629649101872, "yes": 0.4718847013678686}, "ground_truth": 0}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8329479031563084, "res": {"Yes": 0.8329479031563084, "yes": 0.16042150844330755}, "ground_truth": 0}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9438991482733882, "res": {"Yes": 0.9438991482733882, "yes": 0.05122783204509261}, "ground_truth": 0}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7608705353264629, "res": {"Yes": 0.7608705353264629, "yes": 0.23096372856229375}, "ground_truth": 1}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8101987103356421, "res": {"Yes": 0.8101987103356421, "yes": 0.1806014336083549}, "ground_truth": 0}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8551439599570096, "res": {"Yes": 0.8551439599570096, "yes": 0.12912155906375197}, "ground_truth": 0}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8190195539641322, "res": {"Yes": 0.8190195539641322, "yes": 0.15721740284398208}, "ground_truth": 0}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8488163070326326, "res": {"Yes": 0.8488163070326326, "yes": 0.12821835115564212}, "ground_truth": 0}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7111920308939208, "res": {"Yes": 0.7111920308939208, "yes": 0.2717655317663765}, "ground_truth": 1}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8759052640111067, "res": {"Yes": 0.8759052640111067, "yes": 0.10911233839585115}, "ground_truth": 0}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7818449826443221, "res": {"Yes": 0.7818449826443221, "yes": 0.2018904208118799}, "ground_truth": 0}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.531446417508978, "res": {"Yes": 0.531446417508978, "yes": 0.4584138501150385}, "ground_truth": 0}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8277454991669568, "res": {"Yes": 0.8277454991669568, "yes": 0.16560515637260656}, "ground_truth": 0}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8610241725306587, "res": {"Yes": 0.8610241725306587, "yes": 0.13176006497255224}, "ground_truth": 1}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8951586391207809, "res": {"Yes": 0.8951586391207809, "yes": 0.10092187260909384}, "ground_truth": 0}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8409791089862058, "res": {"Yes": 0.8409791089862058, "yes": 0.15324762528181052}, "ground_truth": 0}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9253626409765735, "res": {"Yes": 0.9253626409765735, "yes": 0.06283962079911612}, "ground_truth": 0}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8322591236131068, "res": {"Yes": 0.8322591236131068, "yes": 0.15889397866841984}, "ground_truth": 0}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8220872796004637, "res": {"Yes": 0.8220872796004637, "yes": 0.16656129486170726}, "ground_truth": 1}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8639592203162668, "res": {"Yes": 0.8639592203162668, "yes": 0.12075417638599448}, "ground_truth": 0}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9758898388208048, "res": {"Yes": 0.9758898388208048, "yes": 0.023830233246119013}, "ground_truth": 0}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8811267260269653, "res": {"Yes": 0.8811267260269653, "yes": 0.11248645101644122}, "ground_truth": 0}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9387123342603422, "res": {"Yes": 0.9387123342603422, "yes": 0.0594931439976463}, "ground_truth": 0}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6680379738368978, "res": {"Yes": 0.6680379738368978, "yes": 0.3235366109623237}, "ground_truth": 1}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7711600418361013, "res": {"Yes": 0.7711600418361013, "yes": 0.21864604271640062}, "ground_truth": 0}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8880784067483837, "res": {"Yes": 0.8880784067483837, "yes": 0.10841607582537492}, "ground_truth": 0}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8245134617069518, "res": {"Yes": 0.8245134617069518, "yes": 0.1681185321246667}, "ground_truth": 0}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7667083481949841, "res": {"Yes": 0.7667083481949841, "yes": 0.22756269412147348}, "ground_truth": 0}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8773746524995139, "res": {"Yes": 0.8773746524995139, "yes": 0.11673686431307959}, "ground_truth": 1}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9272110587401035, "res": {"Yes": 0.9272110587401035, "yes": 0.06732450450897559}, "ground_truth": 0}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9723005096472465, "res": {"Yes": 0.9723005096472465, "yes": 0.0190152650253473}, "ground_truth": 0}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8492560313451882, "res": {"Yes": 0.8492560313451882, "yes": 0.14231893767405182}, "ground_truth": 0}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.690075063242342, "res": {"Yes": 0.690075063242342, "yes": 0.3024952814939513}, "ground_truth": 0}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7795000237625168, "res": {"Yes": 0.7795000237625168, "yes": 0.21449297322681402}, "ground_truth": 1}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8029627697965016, "res": {"Yes": 0.8029627697965016, "yes": 0.19091837467525904}, "ground_truth": 0}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7388657787775506, "res": {"Yes": 0.7388657787775506, "yes": 0.251052198329307}, "ground_truth": 0}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.981263320446827, "res": {"Yes": 0.981263320446827, "yes": 0.015401561170389563}, "ground_truth": 0}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9767929193837983, "res": {"Yes": 0.9767929193837983, "yes": 0.021302424211424545}, "ground_truth": 0}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8677866929252303, "res": {"Yes": 0.8677866929252303, "yes": 0.1259672281384867}, "ground_truth": 1}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9759709044770792, "res": {"Yes": 0.9759709044770792, "yes": 0.017677269035882945}, "ground_truth": 0}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6218593544867714, "res": {"Yes": 0.6218593544867714, "yes": 0.37385416154797524}, "ground_truth": 0}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9284744426365813, "res": {"Yes": 0.9284744426365813, "yes": 0.06568601747251517}, "ground_truth": 0}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9233009788864611, "res": {"Yes": 0.9233009788864611, "yes": 0.06700039259375866}, "ground_truth": 0}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9094103468403226, "res": {"Yes": 0.9094103468403226, "yes": 0.08138824717479377}, "ground_truth": 1}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9393441781792656, "res": {"Yes": 0.9393441781792656, "yes": 0.054526726664758496}, "ground_truth": 0}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9128997511536289, "res": {"Yes": 0.9128997511536289, "yes": 0.0812728716625512}, "ground_truth": 0}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.885807606833996, "res": {"Yes": 0.885807606833996, "yes": 0.10683417638942366}, "ground_truth": 0}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9067287340776196, "res": {"Yes": 0.9067287340776196, "yes": 0.08304791903581284}, "ground_truth": 0}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9057006100808433, "res": {"Yes": 0.9057006100808433, "yes": 0.08725605692196711}, "ground_truth": 1}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.891831704756978, "res": {"Yes": 0.891831704756978, "yes": 0.10195285280090587}, "ground_truth": 0}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8889629103319829, "res": {"Yes": 0.8889629103319829, "yes": 0.1014588075317586}, "ground_truth": 0}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8230962952381503, "res": {"Yes": 0.8230962952381503, "yes": 0.17134320279367254}, "ground_truth": 0}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5170259563644644, "res": {"Yes": 0.5170259563644644, "yes": 0.4725373441185121}, "ground_truth": 0}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5965124594905682, "res": {"Yes": 0.5965124594905682, "yes": 0.39803921436342465}, "ground_truth": 1}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8401124559285225, "res": {"Yes": 0.8401124559285225, "yes": 0.15443878110040754}, "ground_truth": 0}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5044915644211099, "res": {"Yes": 0.5044915644211099, "yes": 0.49117269109485445}, "ground_truth": 0}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8702383668286395, "res": {"Yes": 0.8702383668286395, "yes": 0.12481190936444675}, "ground_truth": 0}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9819958428371725, "res": {"Yes": 0.9819958428371725, "yes": 0.01574929778159507}, "ground_truth": 0}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9416238680132659, "res": {"Yes": 0.9416238680132659, "yes": 0.05313524623652817}, "ground_truth": 1}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8757670920959435, "res": {"Yes": 0.8757670920959435, "yes": 0.12110685789621034}, "ground_truth": 0}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8854234179745849, "res": {"Yes": 0.8854234179745849, "yes": 0.10924895612194045}, "ground_truth": 0}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9647145196214562, "res": {"Yes": 0.9647145196214562, "yes": 0.03199996723785391}, "ground_truth": 0}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9756770126502282, "res": {"Yes": 0.9756770126502282, "yes": 0.01656295009595692}, "ground_truth": 0}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9494573463130205, "res": {"Yes": 0.9494573463130205, "yes": 0.04630011194843996}, "ground_truth": 1}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9713227321572885, "res": {"Yes": 0.9713227321572885, "yes": 0.02620273458993491}, "ground_truth": 0}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9605730111227523, "res": {"Yes": 0.9605730111227523, "yes": 0.03132373694507965}, "ground_truth": 0}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6726335969911283, "res": {"Yes": 0.6726335969911283, "yes": 0.32367912786571024}, "ground_truth": 0}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7394951169728552, "res": {"Yes": 0.7394951169728552, "yes": 0.2564530684802695}, "ground_truth": 0}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7681577217940715, "res": {"Yes": 0.7681577217940715, "yes": 0.2266732105099463}, "ground_truth": 1}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9818666491613915, "res": {"Yes": 0.9818666491613915, "yes": 0.01139514376346864}, "ground_truth": 0}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9627073571723351, "res": {"Yes": 0.9627073571723351, "yes": 0.030687597384707794}, "ground_truth": 0}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7778629399916408, "res": {"Yes": 0.7778629399916408, "yes": 0.2163399426223114}, "ground_truth": 0}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9888461476596198, "res": {"Yes": 0.9888461476596198, "yes": 0.008059019338720051}, "ground_truth": 0}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8712953731870126, "res": {"Yes": 0.8712953731870126, "yes": 0.12110861395838128}, "ground_truth": 1}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8686187696771926, "res": {"Yes": 0.8686187696771926, "yes": 0.12634826208639385}, "ground_truth": 0}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.745807279966396, "res": {"Yes": 0.745807279966396, "yes": 0.24719848056480392}, "ground_truth": 0}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9609312909677594, "res": {"Yes": 0.9609312909677594, "yes": 0.02999306772066447}, "ground_truth": 0}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9759227649235206, "res": {"Yes": 0.9759227649235206, "yes": 0.018503984346603313}, "ground_truth": 0}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9703253745994256, "res": {"Yes": 0.9703253745994256, "yes": 0.022690401167066217}, "ground_truth": 1}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7622948163684075, "res": {"Yes": 0.7622948163684075, "yes": 0.23385144866086213}, "ground_truth": 0}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7714707276688789, "res": {"Yes": 0.7714707276688789, "yes": 0.22164410813170143}, "ground_truth": 0}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.847751481848279, "res": {"Yes": 0.847751481848279, "yes": 0.1461159053446571}, "ground_truth": 0}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7325724757368414, "res": {"Yes": 0.7325724757368414, "yes": 0.26301679224560987}, "ground_truth": 0}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8607076235348599, "res": {"Yes": 0.8607076235348599, "yes": 0.13404606090481447}, "ground_truth": 1}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8598311209553849, "res": {"Yes": 0.8598311209553849, "yes": 0.1364656560560087}, "ground_truth": 0}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7794052345284032, "res": {"Yes": 0.7794052345284032, "yes": 0.2138970188737531}, "ground_truth": 0}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8235203648834223, "res": {"Yes": 0.8235203648834223, "yes": 0.17082893556554452}, "ground_truth": 0}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7300241523325169, "res": {"Yes": 0.7300241523325169, "yes": 0.2664678448178029}, "ground_truth": 0}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9822738763748564, "res": {"Yes": 0.9822738763748564, "yes": 0.010835938828344317}, "ground_truth": 1}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8953383566751364, "res": {"Yes": 0.8953383566751364, "yes": 0.10111121796614415}, "ground_truth": 0}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.951967775527014, "res": {"Yes": 0.951967775527014, "yes": 0.040030232413960226}, "ground_truth": 0}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8879244361823015, "res": {"Yes": 0.8879244361823015, "yes": 0.09770974312327692}, "ground_truth": 0}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8761570090770828, "res": {"Yes": 0.8761570090770828, "yes": 0.11096250805451285}, "ground_truth": 0}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9130960055248095, "res": {"Yes": 0.9130960055248095, "yes": 0.07801120568366035}, "ground_truth": 1}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9447891519621987, "res": {"Yes": 0.9447891519621987, "yes": 0.048343756343125724}, "ground_truth": 0}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9350460252861338, "res": {"Yes": 0.9350460252861338, "yes": 0.050466223906722375}, "ground_truth": 0}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8837670934557429, "res": {"Yes": 0.8837670934557429, "yes": 0.10973101785794358}, "ground_truth": 0}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9120965529945673, "res": {"Yes": 0.9120965529945673, "yes": 0.0793244956146126}, "ground_truth": 0}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8833801191474446, "res": {"Yes": 0.8833801191474446, "yes": 0.11059309765765582}, "ground_truth": 1}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9194592440085341, "res": {"Yes": 0.9194592440085341, "yes": 0.07406153331683586}, "ground_truth": 0}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8785728757878853, "res": {"Yes": 0.8785728757878853, "yes": 0.1074404326367342}, "ground_truth": 0}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9506783115643209, "res": {"Yes": 0.9506783115643209, "yes": 0.03441572489531548}, "ground_truth": 0}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6677030501395649, "res": {"Yes": 0.6677030501395649, "yes": 0.23665480939923078}, "ground_truth": 0}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.545329161815168, "res": {"Yes": 0.545329161815168, "yes": 0.3602958786340524}, "ground_truth": 1}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4919880949690325, "res": {"Yes": 0.4919880949690325, "yes": 0.4191934798035555}, "ground_truth": 0}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6495162785390207, "res": {"Yes": 0.6495162785390207, "yes": 0.289561172592156}, "ground_truth": 0}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8391991288269739, "res": {"Yes": 0.8391991288269739, "yes": 0.1499684560500832}, "ground_truth": 0}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5358935705683148, "res": {"Yes": 0.5358935705683148, "yes": 0.4454830856088564}, "ground_truth": 0}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8846216318729687, "res": {"Yes": 0.8846216318729687, "yes": 0.10604688196071038}, "ground_truth": 1}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8547241616935574, "res": {"Yes": 0.8547241616935574, "yes": 0.13908575411487994}, "ground_truth": 0}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.844539348480771, "res": {"Yes": 0.844539348480771, "yes": 0.14588372188999518}, "ground_truth": 0}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9090513279565322, "res": {"Yes": 0.9090513279565322, "yes": 0.0836571817903788}, "ground_truth": 0}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7067832762755205, "res": {"Yes": 0.7067832762755205, "yes": 0.28529350115761304}, "ground_truth": 0}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5923087161727787, "res": {"Yes": 0.5923087161727787, "yes": 0.39067946023800254}, "ground_truth": 1}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7959821961767608, "res": {"Yes": 0.7959821961767608, "yes": 0.19588572999456116}, "ground_truth": 0}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8541682760920768, "res": {"Yes": 0.8541682760920768, "yes": 0.13510805679578655}, "ground_truth": 0}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.931311147219712, "res": {"Yes": 0.931311147219712, "yes": 0.06109613327471869}, "ground_truth": 0}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9374569520548369, "res": {"Yes": 0.9374569520548369, "yes": 0.053345881604823446}, "ground_truth": 0}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.930972025457716, "res": {"Yes": 0.930972025457716, "yes": 0.06046305872644895}, "ground_truth": 1}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9161303966665256, "res": {"Yes": 0.9161303966665256, "yes": 0.07200001151755882}, "ground_truth": 0}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9452212707076356, "res": {"Yes": 0.9452212707076356, "yes": 0.05006638270537057}, "ground_truth": 0}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9090891543692615, "res": {"Yes": 0.9090891543692615, "yes": 0.0763522941471571}, "ground_truth": 0}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9207224549391739, "res": {"Yes": 0.9207224549391739, "yes": 0.07496784430366664}, "ground_truth": 0}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9404200893367524, "res": {"Yes": 0.9404200893367524, "yes": 0.05314329152182111}, "ground_truth": 1}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9307486096739694, "res": {"Yes": 0.9307486096739694, "yes": 0.05667410355529547}, "ground_truth": 0}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9324353683002546, "res": {"Yes": 0.9324353683002546, "yes": 0.062216956739464174}, "ground_truth": 0}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.793737886495277, "res": {"Yes": 0.793737886495277, "yes": 0.1987179691390787}, "ground_truth": 0}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6875723627989752, "res": {"Yes": 0.6875723627989752, "yes": 0.30423842262986434}, "ground_truth": 0}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9681578391739744, "res": {"Yes": 0.9681578391739744, "yes": 0.022048383289090374}, "ground_truth": 1}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6856783354394308, "res": {"Yes": 0.6856783354394308, "yes": 0.3051673391130463}, "ground_truth": 0}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5976307145390078, "res": {"Yes": 0.5976307145390078, "yes": 0.39309321772901307}, "ground_truth": 0}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7769842337534184, "res": {"Yes": 0.7769842337534184, "yes": 0.20770651110243943}, "ground_truth": 0}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7974164457750038, "res": {"Yes": 0.7974164457750038, "yes": 0.19070146198346216}, "ground_truth": 0}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.700588109670957, "res": {"Yes": 0.700588109670957, "yes": 0.2829457771141329}, "ground_truth": 1}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8603824063666803, "res": {"Yes": 0.8603824063666803, "yes": 0.13115031117594186}, "ground_truth": 0}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.948964119341548, "res": {"Yes": 0.948964119341548, "yes": 0.04167803614873042}, "ground_truth": 0}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.81479736382437, "res": {"Yes": 0.81479736382437, "yes": 0.17363478889014808}, "ground_truth": 0}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7982318379334868, "res": {"Yes": 0.7982318379334868, "yes": 0.19343111978268976}, "ground_truth": 0}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8886915202763065, "res": {"Yes": 0.8886915202763065, "yes": 0.10713232782174292}, "ground_truth": 1}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8715678698528415, "res": {"Yes": 0.8715678698528415, "yes": 0.12332528757725779}, "ground_truth": 0}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8811976331475184, "res": {"Yes": 0.8811976331475184, "yes": 0.1089567243390977}, "ground_truth": 0}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8541277638514971, "res": {"Yes": 0.8541277638514971, "yes": 0.13214532698531672}, "ground_truth": 0}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8915787190730026, "res": {"Yes": 0.8915787190730026, "yes": 0.09037980794261401}, "ground_truth": 0}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.911145576423112, "res": {"Yes": 0.911145576423112, "yes": 0.07254223028582522}, "ground_truth": 1}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8957652572579154, "res": {"Yes": 0.8957652572579154, "yes": 0.09498735514286534}, "ground_truth": 0}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8848185086293893, "res": {"Yes": 0.8848185086293893, "yes": 0.10278135643957827}, "ground_truth": 0}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8988872315782475, "res": {"Yes": 0.8988872315782475, "yes": 0.09082907821351718}, "ground_truth": 0}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8714595842723932, "res": {"Yes": 0.8714595842723932, "yes": 0.11334859469865315}, "ground_truth": 0}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9032867801443699, "res": {"Yes": 0.9032867801443699, "yes": 0.08616243372127722}, "ground_truth": 1}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9118429387697207, "res": {"Yes": 0.9118429387697207, "yes": 0.07471390945203771}, "ground_truth": 0}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.942612936795412, "res": {"Yes": 0.942612936795412, "yes": 0.048595628533092364}, "ground_truth": 0}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9034562436057856, "res": {"Yes": 0.9034562436057856, "yes": 0.09016627618847343}, "ground_truth": 0}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8567432987225491, "res": {"Yes": 0.8567432987225491, "yes": 0.13395011827676623}, "ground_truth": 0}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.901677429357548, "res": {"Yes": 0.901677429357548, "yes": 0.0895477747527218}, "ground_truth": 1}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8855210058669619, "res": {"Yes": 0.8855210058669619, "yes": 0.1082634792739239}, "ground_truth": 0}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9023049987393181, "res": {"Yes": 0.9023049987393181, "yes": 0.08939551194681684}, "ground_truth": 0}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.82926118761573, "res": {"Yes": 0.82926118761573, "yes": 0.16528379712201108}, "ground_truth": 0}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8592941951584881, "res": {"Yes": 0.8592941951584881, "yes": 0.12988516046188062}, "ground_truth": 0}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8373145605824892, "res": {"Yes": 0.8373145605824892, "yes": 0.15649727207257724}, "ground_truth": 1}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9713868425445851, "res": {"Yes": 0.9713868425445851, "yes": 0.02156448781322999}, "ground_truth": 0}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8323781285300599, "res": {"Yes": 0.8323781285300599, "yes": 0.15325823038441233}, "ground_truth": 0}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8019496609346093, "res": {"Yes": 0.8019496609346093, "yes": 0.1805732078101855}, "ground_truth": 0}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9204162551631199, "res": {"Yes": 0.9204162551631199, "yes": 0.07120680423727474}, "ground_truth": 0}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8718660667904571, "res": {"Yes": 0.8718660667904571, "yes": 0.11022506037445255}, "ground_truth": 1}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8796031844229909, "res": {"Yes": 0.8796031844229909, "yes": 0.11143625005255395}, "ground_truth": 0}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8896062187579208, "res": {"Yes": 0.8896062187579208, "yes": 0.09778461745971913}, "ground_truth": 0}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7105877603012488, "res": {"Yes": 0.7105877603012488, "yes": 0.2784602385013699}, "ground_truth": 0}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.719624550944183, "res": {"Yes": 0.719624550944183, "yes": 0.2766929974173778}, "ground_truth": 0}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8146498581150068, "res": {"Yes": 0.8146498581150068, "yes": 0.17314174235026414}, "ground_truth": 1}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.838924948527035, "res": {"Yes": 0.838924948527035, "yes": 0.13260006646316438}, "ground_truth": 0}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6539890439730802, "res": {"Yes": 0.6539890439730802, "yes": 0.32945025750557005}, "ground_truth": 0}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7055778969249095, "res": {"Yes": 0.7055778969249095, "yes": 0.28249142015206125}, "ground_truth": 0}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7611359732588691, "res": {"Yes": 0.7611359732588691, "yes": 0.22445366165199684}, "ground_truth": 0}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.457676365587557, "res": {"yes": 0.5270395425257163, "Yes": 0.457676365587557}, "ground_truth": 1}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.404704341280783, "res": {"yes": 0.5758666416531504, "Yes": 0.404704341280783}, "ground_truth": 0}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6249985432802824, "res": {"Yes": 0.6249985432802824, "yes": 0.36224507262980404}, "ground_truth": 0}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.802184257497838, "res": {"Yes": 0.802184257497838, "yes": 0.18469303542251306}, "ground_truth": 0}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7599946007525168, "res": {"Yes": 0.7599946007525168, "yes": 0.23014581331473244}, "ground_truth": 0}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7988560916607363, "res": {"Yes": 0.7988560916607363, "yes": 0.1900368838757038}, "ground_truth": 1}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9826745421240001, "res": {"Yes": 0.9826745421240001, "yes": 0.011145367926421259}, "ground_truth": 0}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7673434149075943, "res": {"Yes": 0.7673434149075943, "yes": 0.2258392084405214}, "ground_truth": 0}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.893783574326046, "res": {"Yes": 0.893783574326046, "yes": 0.10121314897358452}, "ground_truth": 0}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9141890873369755, "res": {"Yes": 0.9141890873369755, "yes": 0.08345282306731007}, "ground_truth": 0}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9153191195274009, "res": {"Yes": 0.9153191195274009, "yes": 0.0824533005376246}, "ground_truth": 1}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9531017337272254, "res": {"Yes": 0.9531017337272254, "yes": 0.044526133167049856}, "ground_truth": 0}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9035965666773804, "res": {"Yes": 0.9035965666773804, "yes": 0.09196911176424005}, "ground_truth": 0}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7210039811546639, "res": {"Yes": 0.7210039811546639, "yes": 0.2610001231031438}, "ground_truth": 0}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8215106890207449, "res": {"Yes": 0.8215106890207449, "yes": 0.1620065034179949}, "ground_truth": 0}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7925975573895347, "res": {"Yes": 0.7925975573895347, "yes": 0.19496857251438865}, "ground_truth": 1}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.934145645074118, "res": {"Yes": 0.934145645074118, "yes": 0.058085924953874435}, "ground_truth": 0}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7726390394985163, "res": {"Yes": 0.7726390394985163, "yes": 0.2167656636408786}, "ground_truth": 0}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8776869393805692, "res": {"Yes": 0.8776869393805692, "yes": 0.10946438869691434}, "ground_truth": 0}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7044328979106436, "res": {"Yes": 0.7044328979106436, "yes": 0.26986976181680267}, "ground_truth": 0}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.733598296612035, "res": {"Yes": 0.733598296612035, "yes": 0.23915342198735434}, "ground_truth": 1}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8689129859265571, "res": {"Yes": 0.8689129859265571, "yes": 0.11367957883835358}, "ground_truth": 0}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9205304822261348, "res": {"Yes": 0.9205304822261348, "yes": 0.06913420247302655}, "ground_truth": 0}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7650613441034453, "res": {"Yes": 0.7650613441034453, "yes": 0.21473430623926335}, "ground_truth": 0}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8510642958266147, "res": {"Yes": 0.8510642958266147, "yes": 0.1272465672932303}, "ground_truth": 0}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7650866757039109, "res": {"Yes": 0.7650866757039109, "yes": 0.21235947574043637}, "ground_truth": 1}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9011365220898565, "res": {"Yes": 0.9011365220898565, "yes": 0.08319819717162653}, "ground_truth": 0}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8710003589649854, "res": {"Yes": 0.8710003589649854, "yes": 0.11487687228319206}, "ground_truth": 0}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6580811677640113, "res": {"Yes": 0.6580811677640113, "yes": 0.3104659330664583}, "ground_truth": 0}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5995659933035073, "res": {"Yes": 0.5995659933035073, "yes": 0.3894561890390979}, "ground_truth": 0}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8327444971298585, "res": {"Yes": 0.8327444971298585, "yes": 0.14889874020593405}, "ground_truth": 1}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7293179767313822, "res": {"Yes": 0.7293179767313822, "yes": 0.25042204612540436}, "ground_truth": 0}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7825913673480912, "res": {"Yes": 0.7825913673480912, "yes": 0.20715990417328478}, "ground_truth": 0}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9565106153933052, "res": {"Yes": 0.9565106153933052, "yes": 0.037180583607471375}, "ground_truth": 0}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9279338231495186, "res": {"Yes": 0.9279338231495186, "yes": 0.061155217347874324}, "ground_truth": 0}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9353994796280992, "res": {"Yes": 0.9353994796280992, "yes": 0.05578495888295536}, "ground_truth": 1}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9813310446718544, "res": {"Yes": 0.9813310446718544, "yes": 0.015327081232784437}, "ground_truth": 0}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9349364873206181, "res": {"Yes": 0.9349364873206181, "yes": 0.0553711543739471}, "ground_truth": 0}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.828384649443478, "res": {"Yes": 0.828384649443478, "yes": 0.16338047430400593}, "ground_truth": 0}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8536301573624965, "res": {"Yes": 0.8536301573624965, "yes": 0.13812451749017807}, "ground_truth": 0}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9452336248303778, "res": {"Yes": 0.9452336248303778, "yes": 0.04770232214441184}, "ground_truth": 1}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8968224267590651, "res": {"Yes": 0.8968224267590651, "yes": 0.09219377195464563}, "ground_truth": 0}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9131485922379875, "res": {"Yes": 0.9131485922379875, "yes": 0.07967742364509023}, "ground_truth": 0}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6634344263572839, "res": {"Yes": 0.6634344263572839, "yes": 0.2639536128964436}, "ground_truth": 0}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.3597146862323218, "res": {"yes": 0.5909193703920523, "Yes": 0.3597146862323218}, "ground_truth": 0}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6090103599529119, "res": {"Yes": 0.6090103599529119, "yes": 0.381811418926985}, "ground_truth": 1}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6155784586968188, "res": {"Yes": 0.6155784586968188, "yes": 0.32069190957321897}, "ground_truth": 0}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.28316978903196505, "res": {"yes": 0.5607430498730767, "Yes": 0.28316978903196505}, "ground_truth": 0}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8468459314417474, "res": {"Yes": 0.8468459314417474, "yes": 0.1482330620863849}, "ground_truth": 0}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9056066485168879, "res": {"Yes": 0.9056066485168879, "yes": 0.08407852461186695}, "ground_truth": 0}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8873197826989999, "res": {"Yes": 0.8873197826989999, "yes": 0.10746921978450383}, "ground_truth": 1}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8873513008574406, "res": {"Yes": 0.8873513008574406, "yes": 0.10512007613274993}, "ground_truth": 0}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8384262887923831, "res": {"Yes": 0.8384262887923831, "yes": 0.1546167043237098}, "ground_truth": 0}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6595945835827379, "res": {"Yes": 0.6595945835827379, "yes": 0.3285314524430006}, "ground_truth": 0}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7678114375529637, "res": {"Yes": 0.7678114375529637, "yes": 0.22634060782428841}, "ground_truth": 0}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8353953526151113, "res": {"Yes": 0.8353953526151113, "yes": 0.16087677173672413}, "ground_truth": 1}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5589259406636378, "res": {"Yes": 0.5589259406636378, "yes": 0.4336507466816205}, "ground_truth": 0}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7905257093219897, "res": {"Yes": 0.7905257093219897, "yes": 0.1963400841744634}, "ground_truth": 0}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9302695836015995, "res": {"Yes": 0.9302695836015995, "yes": 0.059786487804741724}, "ground_truth": 0}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8903188606733314, "res": {"Yes": 0.8903188606733314, "yes": 0.103613463607413}, "ground_truth": 0}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9831552563041769, "res": {"Yes": 0.9831552563041769, "yes": 0.012156053534261671}, "ground_truth": 1}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.951997271753524, "res": {"Yes": 0.951997271753524, "yes": 0.04164470705322719}, "ground_truth": 0}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9398473410924839, "res": {"Yes": 0.9398473410924839, "yes": 0.051635931599781436}, "ground_truth": 0}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7661508473301534, "res": {"Yes": 0.7661508473301534, "yes": 0.2261260707647127}, "ground_truth": 0}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6875959950672027, "res": {"Yes": 0.6875959950672027, "yes": 0.3080543963735871}, "ground_truth": 0}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8689082242964411, "res": {"Yes": 0.8689082242964411, "yes": 0.11623689175142346}, "ground_truth": 1}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6758754266664831, "res": {"Yes": 0.6758754266664831, "yes": 0.3182744557611729}, "ground_truth": 0}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.49682807911615817, "res": {"Yes": 0.49682807911615817, "yes": 0.49456510912710067}, "ground_truth": 0}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9147402351716298, "res": {"Yes": 0.9147402351716298, "yes": 0.07759432390673456}, "ground_truth": 0}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8594898271550631, "res": {"Yes": 0.8594898271550631, "yes": 0.12863909612083804}, "ground_truth": 0}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8439742198956725, "res": {"Yes": 0.8439742198956725, "yes": 0.147622786300686}, "ground_truth": 1}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9049159658571547, "res": {"Yes": 0.9049159658571547, "yes": 0.08680398050628123}, "ground_truth": 0}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8987838978464996, "res": {"Yes": 0.8987838978464996, "yes": 0.09012151073190104}, "ground_truth": 0}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6628424824866341, "res": {"Yes": 0.6628424824866341, "yes": 0.3340127877706106}, "ground_truth": 0}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9614195065446054, "res": {"Yes": 0.9614195065446054, "yes": 0.03532136702511605}, "ground_truth": 0}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9347285525240691, "res": {"Yes": 0.9347285525240691, "yes": 0.05883567443336805}, "ground_truth": 1}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9556152902292547, "res": {"Yes": 0.9556152902292547, "yes": 0.037622571607100336}, "ground_truth": 0}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6703784803808762, "res": {"Yes": 0.6703784803808762, "yes": 0.32341854164828315}, "ground_truth": 0}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9267524623328841, "res": {"Yes": 0.9267524623328841, "yes": 0.06445131798999897}, "ground_truth": 0}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8077259247864658, "res": {"Yes": 0.8077259247864658, "yes": 0.18397787006356572}, "ground_truth": 0}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8031427499802944, "res": {"Yes": 0.8031427499802944, "yes": 0.18311478176557416}, "ground_truth": 1}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7095198112066439, "res": {"Yes": 0.7095198112066439, "yes": 0.2813278116734603}, "ground_truth": 0}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8951709619594249, "res": {"Yes": 0.8951709619594249, "yes": 0.0924274376638319}, "ground_truth": 0}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9491937964482376, "res": {"Yes": 0.9491937964482376, "yes": 0.045762088163871895}, "ground_truth": 0}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9089277053824557, "res": {"Yes": 0.9089277053824557, "yes": 0.0848892341455913}, "ground_truth": 0}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.847741461484984, "res": {"Yes": 0.847741461484984, "yes": 0.1462830259356326}, "ground_truth": 1}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8720193455983842, "res": {"Yes": 0.8720193455983842, "yes": 0.12301671974027346}, "ground_truth": 0}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9249611429256535, "res": {"Yes": 0.9249611429256535, "yes": 0.06821116621786043}, "ground_truth": 0}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.516472688420311, "res": {"Yes": 0.516472688420311, "yes": 0.47624683415828145}, "ground_truth": 0}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9525808154153916, "res": {"Yes": 0.9525808154153916, "yes": 0.043165414269211534}, "ground_truth": 0}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9553905588089525, "res": {"Yes": 0.9553905588089525, "yes": 0.03832427391002293}, "ground_truth": 1}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9391402686933585, "res": {"Yes": 0.9391402686933585, "yes": 0.05193413488967858}, "ground_truth": 0}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6757827704972127, "res": {"Yes": 0.6757827704972127, "yes": 0.3188964618133255}, "ground_truth": 0}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8025074263516794, "res": {"Yes": 0.8025074263516794, "yes": 0.18312353486133987}, "ground_truth": 0}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8620231952825703, "res": {"Yes": 0.8620231952825703, "yes": 0.11952283954179893}, "ground_truth": 0}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6577520121009337, "res": {"Yes": 0.6577520121009337, "yes": 0.3173908497723286}, "ground_truth": 1}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7716637737940049, "res": {"Yes": 0.7716637737940049, "yes": 0.21351384513682142}, "ground_truth": 0}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8234661378528393, "res": {"Yes": 0.8234661378528393, "yes": 0.15028531373571444}, "ground_truth": 0}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9712162334298763, "res": {"Yes": 0.9712162334298763, "yes": 0.0166571554186498}, "ground_truth": 0}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9772073393844662, "res": {"Yes": 0.9772073393844662, "yes": 0.015402280440091044}, "ground_truth": 0}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7896645057400473, "res": {"Yes": 0.7896645057400473, "yes": 0.1867866915695026}, "ground_truth": 1}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8376061977819884, "res": {"Yes": 0.8376061977819884, "yes": 0.15557899032073827}, "ground_truth": 0}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.705047247982929, "res": {"Yes": 0.705047247982929, "yes": 0.2747168357473772}, "ground_truth": 0}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7144475301986, "res": {"Yes": 0.7144475301986, "yes": 0.27962535209614836}, "ground_truth": 0}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9805239767986496, "res": {"Yes": 0.9805239767986496, "yes": 0.017169864427464404}, "ground_truth": 0}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8272274926374382, "res": {"Yes": 0.8272274926374382, "yes": 0.16859971901389892}, "ground_truth": 1}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8288636356157297, "res": {"Yes": 0.8288636356157297, "yes": 0.1669443592876065}, "ground_truth": 0}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9871755777944705, "res": {"Yes": 0.9871755777944705, "yes": 0.008189356136659772}, "ground_truth": 0}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9018488094378738, "res": {"Yes": 0.9018488094378738, "yes": 0.0914954531760848}, "ground_truth": 0}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9837022905121376, "res": {"Yes": 0.9837022905121376, "yes": 0.009466064818638757}, "ground_truth": 0}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8009809459695664, "res": {"Yes": 0.8009809459695664, "yes": 0.19218702356418416}, "ground_truth": 1}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9307510891915683, "res": {"Yes": 0.9307510891915683, "yes": 0.06312433033483206}, "ground_truth": 0}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.921251092122799, "res": {"Yes": 0.921251092122799, "yes": 0.0705903591328156}, "ground_truth": 0}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7215815346609175, "res": {"Yes": 0.7215815346609175, "yes": 0.2667543450728611}, "ground_truth": 0}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7415277232784249, "res": {"Yes": 0.7415277232784249, "yes": 0.24485683161446198}, "ground_truth": 0}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7670410797906716, "res": {"Yes": 0.7670410797906716, "yes": 0.22290067730281}, "ground_truth": 1}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6750337900697113, "res": {"Yes": 0.6750337900697113, "yes": 0.313094380108882}, "ground_truth": 0}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.510284610049576, "res": {"Yes": 0.510284610049576, "yes": 0.47186501003014314}, "ground_truth": 0}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.57150728995606, "res": {"Yes": 0.57150728995606, "yes": 0.42443322003972245}, "ground_truth": 0}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.95291918838601, "res": {"Yes": 0.95291918838601, "yes": 0.04140125022232305}, "ground_truth": 0}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9791114678954687, "res": {"Yes": 0.9791114678954687, "yes": 0.016991059245656148}, "ground_truth": 1}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.97684478651124, "res": {"Yes": 0.97684478651124, "yes": 0.017937334714991475}, "ground_truth": 0}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.982016653513583, "res": {"Yes": 0.982016653513583, "yes": 0.015541615218959215}, "ground_truth": 0}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.794116677133518, "res": {"Yes": 0.794116677133518, "yes": 0.19909264695705894}, "ground_truth": 0}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6555320060790842, "res": {"Yes": 0.6555320060790842, "yes": 0.3284921295816633}, "ground_truth": 0}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6058672720025419, "res": {"Yes": 0.6058672720025419, "yes": 0.3804826121866908}, "ground_truth": 1}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6336138877023433, "res": {"Yes": 0.6336138877023433, "yes": 0.35763595336742926}, "ground_truth": 0}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6368651762821691, "res": {"Yes": 0.6368651762821691, "yes": 0.3511988822098557}, "ground_truth": 0}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8230199154460294, "res": {"Yes": 0.8230199154460294, "yes": 0.17411346405800765}, "ground_truth": 0}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7818581646618536, "res": {"Yes": 0.7818581646618536, "yes": 0.21198645454649906}, "ground_truth": 0}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7920148954168972, "res": {"Yes": 0.7920148954168972, "yes": 0.2043960388997708}, "ground_truth": 1}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.978769999665818, "res": {"Yes": 0.978769999665818, "yes": 0.018636743312197522}, "ground_truth": 0}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7235482828718284, "res": {"Yes": 0.7235482828718284, "yes": 0.26806979587727864}, "ground_truth": 0}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8085022395197844, "res": {"Yes": 0.8085022395197844, "yes": 0.17935121510105584}, "ground_truth": 0}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7377357579910877, "res": {"Yes": 0.7377357579910877, "yes": 0.24908736016815397}, "ground_truth": 0}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.648682199113881, "res": {"Yes": 0.648682199113881, "yes": 0.33722487037963556}, "ground_truth": 1}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8583720918658743, "res": {"Yes": 0.8583720918658743, "yes": 0.13265742839553343}, "ground_truth": 0}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9132717605471317, "res": {"Yes": 0.9132717605471317, "yes": 0.08368875160033844}, "ground_truth": 0}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8059256470889719, "res": {"Yes": 0.8059256470889719, "yes": 0.18840550453304444}, "ground_truth": 0}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8000229613808771, "res": {"Yes": 0.8000229613808771, "yes": 0.1970617987891933}, "ground_truth": 0}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7923809542274857, "res": {"Yes": 0.7923809542274857, "yes": 0.20303386321200334}, "ground_truth": 1}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6944045939128319, "res": {"Yes": 0.6944045939128319, "yes": 0.28829174733970164}, "ground_truth": 0}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.704877570581458, "res": {"Yes": 0.704877570581458, "yes": 0.28622112573392616}, "ground_truth": 0}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9023437265009558, "res": {"Yes": 0.9023437265009558, "yes": 0.0926382088375023}, "ground_truth": 0}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8267594308904438, "res": {"Yes": 0.8267594308904438, "yes": 0.16401378496921512}, "ground_truth": 0}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5907188463885896, "res": {"Yes": 0.5907188463885896, "yes": 0.40335807014585046}, "ground_truth": 1}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8439489179278383, "res": {"Yes": 0.8439489179278383, "yes": 0.1476982112818286}, "ground_truth": 0}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7789037006064917, "res": {"Yes": 0.7789037006064917, "yes": 0.21392108364199425}, "ground_truth": 0}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8163772619817702, "res": {"Yes": 0.8163772619817702, "yes": 0.17467403484357374}, "ground_truth": 0}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8306508241295151, "res": {"Yes": 0.8306508241295151, "yes": 0.15158757018491156}, "ground_truth": 0}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7505778518330537, "res": {"Yes": 0.7505778518330537, "yes": 0.2287168965120751}, "ground_truth": 1}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.907847323903143, "res": {"Yes": 0.907847323903143, "yes": 0.08034069494810309}, "ground_truth": 0}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8578465342524264, "res": {"Yes": 0.8578465342524264, "yes": 0.13574423348529968}, "ground_truth": 0}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8933995712253866, "res": {"Yes": 0.8933995712253866, "yes": 0.09421544040034917}, "ground_truth": 0}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9246717817291988, "res": {"Yes": 0.9246717817291988, "yes": 0.06614853733523153}, "ground_truth": 0}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9154493741285834, "res": {"Yes": 0.9154493741285834, "yes": 0.07664787480583171}, "ground_truth": 1}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9304035055469597, "res": {"Yes": 0.9304035055469597, "yes": 0.059323883315882765}, "ground_truth": 0}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9314229298915453, "res": {"Yes": 0.9314229298915453, "yes": 0.060321360549980295}, "ground_truth": 0}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9285036287609271, "res": {"Yes": 0.9285036287609271, "yes": 0.06377322165600278}, "ground_truth": 0}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9015150248296009, "res": {"Yes": 0.9015150248296009, "yes": 0.08884737224515316}, "ground_truth": 0}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9093903345764258, "res": {"Yes": 0.9093903345764258, "yes": 0.08338331913448975}, "ground_truth": 1}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9155533888179276, "res": {"Yes": 0.9155533888179276, "yes": 0.07849502751170609}, "ground_truth": 0}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8537674321283958, "res": {"Yes": 0.8537674321283958, "yes": 0.1386740588216322}, "ground_truth": 0}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7150058389198536, "res": {"Yes": 0.7150058389198536, "yes": 0.28209223134778905}, "ground_truth": 0}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.51286665467862, "res": {"Yes": 0.51286665467862, "yes": 0.4650522798172552}, "ground_truth": 0}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6324351078047211, "res": {"Yes": 0.6324351078047211, "yes": 0.35904404858941147}, "ground_truth": 1}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.39511717993975193, "res": {"yes": 0.5912802072469318, "Yes": 0.39511717993975193}, "ground_truth": 0}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9686067860479713, "res": {"Yes": 0.9686067860479713, "yes": 0.02238910102308511}, "ground_truth": 0}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7568645435979539, "res": {"Yes": 0.7568645435979539, "yes": 0.21792668066249476}, "ground_truth": 0}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9179175841557617, "res": {"Yes": 0.9179175841557617, "yes": 0.07242138139893978}, "ground_truth": 0}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8412995241821977, "res": {"Yes": 0.8412995241821977, "yes": 0.14394391995735845}, "ground_truth": 1}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8720430997288882, "res": {"Yes": 0.8720430997288882, "yes": 0.11626919847245215}, "ground_truth": 0}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9274559489703783, "res": {"Yes": 0.9274559489703783, "yes": 0.06628484434519358}, "ground_truth": 0}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8493999839488597, "res": {"Yes": 0.8493999839488597, "yes": 0.14072200929229972}, "ground_truth": 0}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8932564958078444, "res": {"Yes": 0.8932564958078444, "yes": 0.10098189859850533}, "ground_truth": 0}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.885305305942053, "res": {"Yes": 0.885305305942053, "yes": 0.10675229801847029}, "ground_truth": 1}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9797621704344625, "res": {"Yes": 0.9797621704344625, "yes": 0.010882597774672069}, "ground_truth": 0}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7346973009786413, "res": {"Yes": 0.7346973009786413, "yes": 0.25390774159795604}, "ground_truth": 0}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8926834769872362, "res": {"Yes": 0.8926834769872362, "yes": 0.10166351413823346}, "ground_truth": 0}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9434209296856689, "res": {"Yes": 0.9434209296856689, "yes": 0.04459050656056064}, "ground_truth": 0}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.951955676093479, "res": {"Yes": 0.951955676093479, "yes": 0.04404506344336124}, "ground_truth": 1}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9179038154952646, "res": {"Yes": 0.9179038154952646, "yes": 0.07967980603567376}, "ground_truth": 0}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9124171884749079, "res": {"Yes": 0.9124171884749079, "yes": 0.08435008181870965}, "ground_truth": 0}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7086871748897395, "res": {"Yes": 0.7086871748897395, "yes": 0.28336473355349256}, "ground_truth": 0}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7853401680238676, "res": {"Yes": 0.7853401680238676, "yes": 0.20979435289358533}, "ground_truth": 0}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7770275285211042, "res": {"Yes": 0.7770275285211042, "yes": 0.21967811887997082}, "ground_truth": 1}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7878746699977295, "res": {"Yes": 0.7878746699977295, "yes": 0.20801181458593718}, "ground_truth": 0}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7649949320108222, "res": {"Yes": 0.7649949320108222, "yes": 0.2203838897242485}, "ground_truth": 0}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7433795130101543, "res": {"Yes": 0.7433795130101543, "yes": 0.2158378818694508}, "ground_truth": 0}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7363508319360383, "res": {"Yes": 0.7363508319360383, "yes": 0.23469626269633112}, "ground_truth": 0}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7534929781026136, "res": {"Yes": 0.7534929781026136, "yes": 0.22464093408756536}, "ground_truth": 1}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7763129143007991, "res": {"Yes": 0.7763129143007991, "yes": 0.20667048788828898}, "ground_truth": 0}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9824431053069381, "res": {"Yes": 0.9824431053069381, "yes": 0.010026039704450535}, "ground_truth": 0}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8649147602236446, "res": {"Yes": 0.8649147602236446, "yes": 0.12692021732113176}, "ground_truth": 0}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.902585184474439, "res": {"Yes": 0.902585184474439, "yes": 0.08769301756924641}, "ground_truth": 0}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8737730108885593, "res": {"Yes": 0.8737730108885593, "yes": 0.11826934155952219}, "ground_truth": 1}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9186079340655454, "res": {"Yes": 0.9186079340655454, "yes": 0.07420545603819868}, "ground_truth": 0}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.910613855869189, "res": {"Yes": 0.910613855869189, "yes": 0.08266454393670294}, "ground_truth": 0}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5788677896828096, "res": {"Yes": 0.5788677896828096, "yes": 0.41721927335609393}, "ground_truth": 0}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5280080548471016, "res": {"Yes": 0.5280080548471016, "yes": 0.467390538460253}, "ground_truth": 0}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9572031232988435, "res": {"Yes": 0.9572031232988435, "yes": 0.03600903859341613}, "ground_truth": 1}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9769251011068499, "res": {"Yes": 0.9769251011068499, "yes": 0.017767203876222038}, "ground_truth": 0}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7126379511010413, "res": {"Yes": 0.7126379511010413, "yes": 0.28344694958755556}, "ground_truth": 0}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8260570417804178, "res": {"Yes": 0.8260570417804178, "yes": 0.15923706696156942}, "ground_truth": 0}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7456321998866603, "res": {"Yes": 0.7456321998866603, "yes": 0.2385719111128458}, "ground_truth": 0}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.835564019253535, "res": {"Yes": 0.835564019253535, "yes": 0.15153034152386094}, "ground_truth": 1}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9006858322737588, "res": {"Yes": 0.9006858322737588, "yes": 0.0924197018110494}, "ground_truth": 0}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7618429857302113, "res": {"Yes": 0.7618429857302113, "yes": 0.22153160797858334}, "ground_truth": 0}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6254476159901549, "res": {"Yes": 0.6254476159901549, "yes": 0.3661152150133191}, "ground_truth": 0}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7400324961735956, "res": {"Yes": 0.7400324961735956, "yes": 0.25439054858368654}, "ground_truth": 0}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7830466313807198, "res": {"Yes": 0.7830466313807198, "yes": 0.21147169906851318}, "ground_truth": 1}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8477451576458137, "res": {"Yes": 0.8477451576458137, "yes": 0.14777586186646435}, "ground_truth": 0}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7389273436389948, "res": {"Yes": 0.7389273436389948, "yes": 0.25507218383979846}, "ground_truth": 0}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8233637215040179, "res": {"Yes": 0.8233637215040179, "yes": 0.1666696282309881}, "ground_truth": 0}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7098435608893876, "res": {"Yes": 0.7098435608893876, "yes": 0.2690043845690329}, "ground_truth": 0}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6998978942046894, "res": {"Yes": 0.6998978942046894, "yes": 0.287638556265933}, "ground_truth": 1}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8916867635619323, "res": {"Yes": 0.8916867635619323, "yes": 0.09500889121729801}, "ground_truth": 0}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7890676739616046, "res": {"Yes": 0.7890676739616046, "yes": 0.19053562386157982}, "ground_truth": 0}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9711405669783979, "res": {"Yes": 0.9711405669783979, "yes": 0.02307145130544268}, "ground_truth": 0}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9799341876810952, "res": {"Yes": 0.9799341876810952, "yes": 0.014836848930635103}, "ground_truth": 0}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6988384355616803, "res": {"Yes": 0.6988384355616803, "yes": 0.29147646908514907}, "ground_truth": 1}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8329333516835478, "res": {"Yes": 0.8329333516835478, "yes": 0.15495381100312658}, "ground_truth": 0}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.4838142968912858, "res": {"yes": 0.5072647106034137, "Yes": 0.4838142968912858}, "ground_truth": 0}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.922008386020488, "res": {"Yes": 0.922008386020488, "yes": 0.07042658917352737}, "ground_truth": 0}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8974730508307921, "res": {"Yes": 0.8974730508307921, "yes": 0.09980694588903799}, "ground_truth": 0}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8074643931724674, "res": {"Yes": 0.8074643931724674, "yes": 0.18662601209303364}, "ground_truth": 1}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9780070288597367, "res": {"Yes": 0.9780070288597367, "yes": 0.014037757715671154}, "ground_truth": 0}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7663790563564246, "res": {"Yes": 0.7663790563564246, "yes": 0.23141616714584654}, "ground_truth": 0}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.820335061990111, "res": {"Yes": 0.820335061990111, "yes": 0.17128070819300162}, "ground_truth": 0}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9097238655230483, "res": {"Yes": 0.9097238655230483, "yes": 0.08119522816355523}, "ground_truth": 0}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7395606983076618, "res": {"Yes": 0.7395606983076618, "yes": 0.24806767176977443}, "ground_truth": 1}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9512466561778143, "res": {"Yes": 0.9512466561778143, "yes": 0.043567971378043746}, "ground_truth": 0}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9486303609398253, "res": {"Yes": 0.9486303609398253, "yes": 0.048705564238966007}, "ground_truth": 0}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7983122079606892, "res": {"Yes": 0.7983122079606892, "yes": 0.18504376764870525}, "ground_truth": 0}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8979990666910689, "res": {"Yes": 0.8979990666910689, "yes": 0.09128815837110213}, "ground_truth": 0}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7970221650311673, "res": {"Yes": 0.7970221650311673, "yes": 0.1810148150429328}, "ground_truth": 1}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8586844587901237, "res": {"Yes": 0.8586844587901237, "yes": 0.1293328713589803}, "ground_truth": 0}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8387422589571982, "res": {"Yes": 0.8387422589571982, "yes": 0.14399196205882706}, "ground_truth": 0}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8366778408633808, "res": {"Yes": 0.8366778408633808, "yes": 0.14755628245762}, "ground_truth": 0}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9822320206122216, "res": {"Yes": 0.9822320206122216, "yes": 0.012535470665142585}, "ground_truth": 0}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9811883695106947, "res": {"Yes": 0.9811883695106947, "yes": 0.011822592020151325}, "ground_truth": 1}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7484669358344649, "res": {"Yes": 0.7484669358344649, "yes": 0.24261050055499597}, "ground_truth": 0}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9729899127629248, "res": {"Yes": 0.9729899127629248, "yes": 0.018186611272256552}, "ground_truth": 0}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7910523115203493, "res": {"Yes": 0.7910523115203493, "yes": 0.1966665743809497}, "ground_truth": 0}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8924095062735954, "res": {"Yes": 0.8924095062735954, "yes": 0.09806367170569137}, "ground_truth": 0}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8679401398554825, "res": {"Yes": 0.8679401398554825, "yes": 0.12446733089011101}, "ground_truth": 1}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8584090627480332, "res": {"Yes": 0.8584090627480332, "yes": 0.13169517594802566}, "ground_truth": 0}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7818533718859941, "res": {"Yes": 0.7818533718859941, "yes": 0.21026027729831476}, "ground_truth": 0}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9331222106030458, "res": {"Yes": 0.9331222106030458, "yes": 0.05633066053370645}, "ground_truth": 0}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5750754956619881, "res": {"Yes": 0.5750754956619881, "yes": 0.4203339771522256}, "ground_truth": 0}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.628312666387538, "res": {"Yes": 0.628312666387538, "yes": 0.36785365372647666}, "ground_truth": 1}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5010751094861453, "res": {"Yes": 0.5010751094861453, "yes": 0.493638900367356}, "ground_truth": 0}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8162967874779374, "res": {"Yes": 0.8162967874779374, "yes": 0.1765326912876131}, "ground_truth": 0}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.593073269666896, "res": {"Yes": 0.593073269666896, "yes": 0.3772817021527874}, "ground_truth": 0}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7923873883869565, "res": {"Yes": 0.7923873883869565, "yes": 0.18074720163990704}, "ground_truth": 0}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9068767551165025, "res": {"Yes": 0.9068767551165025, "yes": 0.07257701536834232}, "ground_truth": 1}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8956696985345942, "res": {"Yes": 0.8956696985345942, "yes": 0.08818572298414196}, "ground_truth": 0}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8614758464752902, "res": {"Yes": 0.8614758464752902, "yes": 0.10390010191316562}, "ground_truth": 0}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.90190668744236, "res": {"Yes": 0.90190668744236, "yes": 0.09125862230224692}, "ground_truth": 0}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8246318126173605, "res": {"Yes": 0.8246318126173605, "yes": 0.162736654704339}, "ground_truth": 0}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6554491389610166, "res": {"Yes": 0.6554491389610166, "yes": 0.33238125848704625}, "ground_truth": 1}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.903481835269795, "res": {"Yes": 0.903481835269795, "yes": 0.09332390382792526}, "ground_truth": 0}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9925562072841417, "res": {"Yes": 0.9925562072841417, "yes": 0.00381521980467929}, "ground_truth": 0}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7766200300805408, "res": {"Yes": 0.7766200300805408, "yes": 0.21932665173570806}, "ground_truth": 0}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5981031190587383, "res": {"Yes": 0.5981031190587383, "yes": 0.3968109929294102}, "ground_truth": 0}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7762009159532788, "res": {"Yes": 0.7762009159532788, "yes": 0.21716477104564122}, "ground_truth": 1}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8722412068181524, "res": {"Yes": 0.8722412068181524, "yes": 0.12260150253105832}, "ground_truth": 0}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7955162054580789, "res": {"Yes": 0.7955162054580789, "yes": 0.19802266205773728}, "ground_truth": 0}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7293044844736164, "res": {"Yes": 0.7293044844736164, "yes": 0.2525851902601662}, "ground_truth": 0}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.886862371936937, "res": {"Yes": 0.886862371936937, "yes": 0.09999863930866207}, "ground_truth": 0}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9807057877003722, "res": {"Yes": 0.9807057877003722, " Yes": 0.011298997206379405}, "ground_truth": 1}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8134524867423798, "res": {"Yes": 0.8134524867423798, "yes": 0.14990480795075478}, "ground_truth": 0}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8639985486350814, "res": {"Yes": 0.8639985486350814, "yes": 0.1211243712140445}, "ground_truth": 0}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7588201395183252, "res": {"Yes": 0.7588201395183252, "yes": 0.23139330435797287}, "ground_truth": 0}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8359745491176727, "res": {"Yes": 0.8359745491176727, "yes": 0.1576710938761167}, "ground_truth": 0}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6745247298723402, "res": {"Yes": 0.6745247298723402, "yes": 0.3225623268075927}, "ground_truth": 1}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7333137010189453, "res": {"Yes": 0.7333137010189453, "yes": 0.26249165183224465}, "ground_truth": 0}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8675189786448727, "res": {"Yes": 0.8675189786448727, "yes": 0.12912607839740708}, "ground_truth": 0}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6838079333410892, "res": {"Yes": 0.6838079333410892, "yes": 0.23505428300541634}, "ground_truth": 0}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.749857155443608, "res": {"Yes": 0.749857155443608, "yes": 0.13024786370243252}, "ground_truth": 0}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5924572265829253, "res": {"Yes": 0.5924572265829253, "yes": 0.3313740769141212}, "ground_truth": 1}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9008892571241135, "res": {"Yes": 0.9008892571241135, "yes": 0.09368573898379222}, "ground_truth": 0}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9113428662637275, "res": {"Yes": 0.9113428662637275, "yes": 0.0508292145566792}, "ground_truth": 0}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.920371612375765, "res": {"Yes": 0.920371612375765, " Yes": 0.061046567769355714}, "ground_truth": 0}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8983128554359938, "res": {"Yes": 0.8983128554359938, "yes": 0.09861780061964398}, "ground_truth": 0}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9691058468321039, "res": {"Yes": 0.9691058468321039, " Yes": 0.016047455143743117}, "ground_truth": 1}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9293396860638103, "res": {"Yes": 0.9293396860638103, "yes": 0.06418658798512185}, "ground_truth": 0}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.981880209815326, "res": {"Yes": 0.981880209815326, " Yes": 0.011971864585374291}, "ground_truth": 0}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9844095619816184, "res": {"Yes": 0.9844095619816184, "yes": 0.009364418855033408}, "ground_truth": 0}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9296761586329471, "res": {"Yes": 0.9296761586329471, "yes": 0.06518070431103523}, "ground_truth": 0}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8768056490550546, "res": {"Yes": 0.8768056490550546, "yes": 0.110215184651482}, "ground_truth": 1}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8810928561665997, "res": {"Yes": 0.8810928561665997, "yes": 0.10539043463308102}, "ground_truth": 0}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8408278893103615, "res": {"Yes": 0.8408278893103615, "yes": 0.15425353486383436}, "ground_truth": 0}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7558982513664627, "res": {"Yes": 0.7558982513664627, "yes": 0.22301498778105028}, "ground_truth": 0}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.836019450516522, "res": {"Yes": 0.836019450516522, "yes": 0.14441119901863103}, "ground_truth": 0}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8673447809753547, "res": {"Yes": 0.8673447809753547, "yes": 0.1185083922783841}, "ground_truth": 1}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8428906118141297, "res": {"Yes": 0.8428906118141297, "yes": 0.1256206032231488}, "ground_truth": 0}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8779421990773566, "res": {"Yes": 0.8779421990773566, "yes": 0.10415995839036712}, "ground_truth": 0}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5654194408910078, "res": {"Yes": 0.5654194408910078, "yes": 0.42534752428315453}, "ground_truth": 0}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7590190068021382, "res": {"Yes": 0.7590190068021382, "yes": 0.23025188894345577}, "ground_truth": 0}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7075348873064368, "res": {"Yes": 0.7075348873064368, "yes": 0.2831760754710607}, "ground_truth": 1}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8736341619149006, "res": {"Yes": 0.8736341619149006, "yes": 0.12082190223786232}, "ground_truth": 0}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6359151223325191, "res": {"Yes": 0.6359151223325191, "yes": 0.35301744641435584}, "ground_truth": 0}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9507553548042895, "res": {"Yes": 0.9507553548042895, "yes": 0.046854126275146}, "ground_truth": 0}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8166400961232876, "res": {"Yes": 0.8166400961232876, "yes": 0.17933864302153377}, "ground_truth": 0}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8776474531334163, "res": {"Yes": 0.8776474531334163, "yes": 0.1180166574085503}, "ground_truth": 1}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9106137557016704, "res": {"Yes": 0.9106137557016704, "yes": 0.08609976473624315}, "ground_truth": 0}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.908856311915213, "res": {"Yes": 0.908856311915213, "yes": 0.08750056749346498}, "ground_truth": 0}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9029514520204209, "res": {"Yes": 0.9029514520204209, "yes": 0.08814471734098353}, "ground_truth": 0}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.84571357544939, "res": {"Yes": 0.84571357544939, "yes": 0.14797161709420087}, "ground_truth": 0}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8824961259876628, "res": {"Yes": 0.8824961259876628, "yes": 0.11198470270571896}, "ground_truth": 1}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8872058492809552, "res": {"Yes": 0.8872058492809552, "yes": 0.10361843717303078}, "ground_truth": 0}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8687348943959264, "res": {"Yes": 0.8687348943959264, "yes": 0.125511360825678}, "ground_truth": 0}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.37319352130757466, "res": {"yes": 0.5279607157671466, "Yes": 0.37319352130757466}, "ground_truth": 0}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.4312539289799175, "res": {"yes": 0.5243244536095801, "Yes": 0.4312539289799175}, "ground_truth": 0}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6359527378244724, "res": {"Yes": 0.6359527378244724, "yes": 0.31295780682646857}, "ground_truth": 1}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4589374925515857, "res": {"Yes": 0.4589374925515857, "yes": 0.4038996592752615}, "ground_truth": 0}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6588748509669499, "res": {"Yes": 0.6588748509669499, "yes": 0.2863439982076378}, "ground_truth": 0}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8365469361232204, "res": {"Yes": 0.8365469361232204, "yes": 0.15935628458815482}, "ground_truth": 0}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9745583256040186, "res": {"Yes": 0.9745583256040186, "yes": 0.01988608774566232}, "ground_truth": 0}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9657805601315833, "res": {"Yes": 0.9657805601315833, "yes": 0.0277752595157151}, "ground_truth": 1}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8030029028786534, "res": {"Yes": 0.8030029028786534, "yes": 0.1902944061528272}, "ground_truth": 0}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.958430275012491, "res": {"Yes": 0.958430275012491, "yes": 0.03461202894386945}, "ground_truth": 0}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.892629248113228, "res": {"Yes": 0.892629248113228, "yes": 0.0985002689845705}, "ground_truth": 0}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9179817616891682, "res": {"Yes": 0.9179817616891682, "yes": 0.05974683860182682}, "ground_truth": 0}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8985004741911583, "res": {"Yes": 0.8985004741911583, "yes": 0.09081545487356119}, "ground_truth": 1}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9487869101975512, "res": {"Yes": 0.9487869101975512, "yes": 0.04551532484284684}, "ground_truth": 0}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8540166319772613, "res": {"Yes": 0.8540166319772613, "yes": 0.13431706731772616}, "ground_truth": 0}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.659261176853602, "res": {"Yes": 0.659261176853602, "yes": 0.33214575012505393}, "ground_truth": 0}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6492092922879339, "res": {"Yes": 0.6492092922879339, "yes": 0.3401001996282112}, "ground_truth": 0}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6249080062995219, "res": {"Yes": 0.6249080062995219, "yes": 0.3645038737565173}, "ground_truth": 1}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7985713661936635, "res": {"Yes": 0.7985713661936635, "yes": 0.1879015544209641}, "ground_truth": 0}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7454648838835927, "res": {"Yes": 0.7454648838835927, "yes": 0.24715050399600294}, "ground_truth": 0}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9072231665628363, "res": {"Yes": 0.9072231665628363, "yes": 0.08399869634177748}, "ground_truth": 0}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8332480766898102, "res": {"Yes": 0.8332480766898102, "yes": 0.15854164114689778}, "ground_truth": 0}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.894714380117418, "res": {"Yes": 0.894714380117418, "yes": 0.09704075830713188}, "ground_truth": 1}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8740186455263347, "res": {"Yes": 0.8740186455263347, "yes": 0.11875621507063693}, "ground_truth": 0}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8361864453577583, "res": {"Yes": 0.8361864453577583, "yes": 0.15800566376242778}, "ground_truth": 0}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9013337393727274, "res": {"Yes": 0.9013337393727274, "yes": 0.08985927890695675}, "ground_truth": 0}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8719184500773279, "res": {"Yes": 0.8719184500773279, "yes": 0.12234137329472636}, "ground_truth": 0}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8529732942594213, "res": {"Yes": 0.8529732942594213, "yes": 0.13978303618501056}, "ground_truth": 1}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8770665287702213, "res": {"Yes": 0.8770665287702213, "yes": 0.11237536928624767}, "ground_truth": 0}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8701365374931038, "res": {"Yes": 0.8701365374931038, "yes": 0.12209742345656023}, "ground_truth": 0}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8830982466132085, "res": {"Yes": 0.8830982466132085, "yes": 0.10935815036254809}, "ground_truth": 0}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8623842172131757, "res": {"Yes": 0.8623842172131757, "yes": 0.12806418441496534}, "ground_truth": 0}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.915391432594172, "res": {"Yes": 0.915391432594172, "yes": 0.07753050555549422}, "ground_truth": 1}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9556833353272437, "res": {"Yes": 0.9556833353272437, "yes": 0.04224348239157519}, "ground_truth": 0}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.806099520122833, "res": {"Yes": 0.806099520122833, "yes": 0.18958575951978035}, "ground_truth": 0}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9172559398979351, "res": {"Yes": 0.9172559398979351, "yes": 0.07742014971746737}, "ground_truth": 0}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9333683820458829, "res": {"Yes": 0.9333683820458829, "yes": 0.06077578757495446}, "ground_truth": 0}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9325085393744347, "res": {"Yes": 0.9325085393744347, "yes": 0.06138547716890345}, "ground_truth": 1}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9415178782049046, "res": {"Yes": 0.9415178782049046, "yes": 0.055840492540456865}, "ground_truth": 0}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9610453410609701, "res": {"Yes": 0.9610453410609701, "yes": 0.03522210184840803}, "ground_truth": 0}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9705082486766207, "res": {"Yes": 0.9705082486766207, "yes": 0.021429268595794908}, "ground_truth": 0}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7819629797739025, "res": {"Yes": 0.7819629797739025, "yes": 0.21230664727500576}, "ground_truth": 0}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9592946981153463, "res": {"Yes": 0.9592946981153463, "yes": 0.035018369931335995}, "ground_truth": 1}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7525321191606834, "res": {"Yes": 0.7525321191606834, "yes": 0.2421377741873239}, "ground_truth": 0}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9662468877915503, "res": {"Yes": 0.9662468877915503, "yes": 0.025451375252989895}, "ground_truth": 0}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8800212887862016, "res": {"Yes": 0.8800212887862016, "yes": 0.11088904191642546}, "ground_truth": 0}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8316422987450803, "res": {"Yes": 0.8316422987450803, "yes": 0.15342416087601257}, "ground_truth": 0}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8939314952337091, "res": {"Yes": 0.8939314952337091, "yes": 0.09425968493520899}, "ground_truth": 1}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8580085967690695, "res": {"Yes": 0.8580085967690695, "yes": 0.12207355574341948}, "ground_truth": 0}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7838058026626549, "res": {"Yes": 0.7838058026626549, "yes": 0.21542639118818688}, "ground_truth": 0}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9174143407428963, "res": {"Yes": 0.9174143407428963, "yes": 0.07475449012152484}, "ground_truth": 0}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8729961790117949, "res": {"Yes": 0.8729961790117949, "yes": 0.11950542633266079}, "ground_truth": 0}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.92001004915881, "res": {"Yes": 0.92001004915881, "yes": 0.07270993109423232}, "ground_truth": 1}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9028790382178012, "res": {"Yes": 0.9028790382178012, "yes": 0.09001592318710172}, "ground_truth": 0}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9251067062637274, "res": {"Yes": 0.9251067062637274, "yes": 0.06969322278624078}, "ground_truth": 0}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8116665953484066, "res": {"Yes": 0.8116665953484066, "yes": 0.180360635847541}, "ground_truth": 0}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6556563132873293, "res": {"Yes": 0.6556563132873293, "yes": 0.3253695713805693}, "ground_truth": 0}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9713555750767474, "res": {"Yes": 0.9713555750767474, "yes": 0.023217870886600184}, "ground_truth": 1}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.590451960849236, "res": {"Yes": 0.590451960849236, "yes": 0.40502142987655965}, "ground_truth": 0}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5779523229255016, "res": {"Yes": 0.5779523229255016, "yes": 0.4093307582021666}, "ground_truth": 0}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8260048283650293, "res": {"Yes": 0.8260048283650293, "yes": 0.1615312752935059}, "ground_truth": 0}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9665419154369446, "res": {"Yes": 0.9665419154369446, "yes": 0.02999175405306783}, "ground_truth": 0}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9620151211392454, "res": {"Yes": 0.9620151211392454, "yes": 0.03549322307571452}, "ground_truth": 1}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9438683493466672, "res": {"Yes": 0.9438683493466672, "yes": 0.05022706771948215}, "ground_truth": 0}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9306662048401847, "res": {"Yes": 0.9306662048401847, "yes": 0.059807703814112365}, "ground_truth": 0}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9524222648668905, "res": {"Yes": 0.9524222648668905, "yes": 0.0384953740289426}, "ground_truth": 0}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9225252058740764, "res": {"Yes": 0.9225252058740764, "yes": 0.06985832614675026}, "ground_truth": 0}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9326441546265765, "res": {"Yes": 0.9326441546265765, "yes": 0.05968868544976158}, "ground_truth": 1}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9398589247823472, "res": {"Yes": 0.9398589247823472, "yes": 0.05512999704395951}, "ground_truth": 0}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.895927226257984, "res": {"Yes": 0.895927226257984, "yes": 0.0944965077681196}, "ground_truth": 0}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8349504142719805, "res": {"Yes": 0.8349504142719805, "yes": 0.1603350805824862}, "ground_truth": 0}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7385632751829794, "res": {"Yes": 0.7385632751829794, "yes": 0.24412367923490774}, "ground_truth": 0}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.838205778144211, "res": {"Yes": 0.838205778144211, "yes": 0.15268327518985597}, "ground_truth": 1}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8804764742751868, "res": {"Yes": 0.8804764742751868, "yes": 0.11108721116371562}, "ground_truth": 0}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9286803212413972, "res": {"Yes": 0.9286803212413972, "yes": 0.0595045796788564}, "ground_truth": 0}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8219897282913149, "res": {"Yes": 0.8219897282913149, "yes": 0.16732909114549954}, "ground_truth": 0}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9240494328648301, "res": {"Yes": 0.9240494328648301, "yes": 0.0690494475419181}, "ground_truth": 0}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8041162586064512, "res": {"Yes": 0.8041162586064512, "yes": 0.18838344354015998}, "ground_truth": 1}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9144513783803768, "res": {"Yes": 0.9144513783803768, "yes": 0.07585633929117283}, "ground_truth": 0}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8627791933691272, "res": {"Yes": 0.8627791933691272, "yes": 0.12938191074534086}, "ground_truth": 0}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9215290976256758, "res": {"Yes": 0.9215290976256758, "yes": 0.06675933108073566}, "ground_truth": 0}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9892803137718457, "res": {"Yes": 0.9892803137718457, "yes": 0.006067133657615565}, "ground_truth": 0}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9140548211045809, "res": {"Yes": 0.9140548211045809, "yes": 0.07330735749679275}, "ground_truth": 1}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.941044406270093, "res": {"Yes": 0.941044406270093, "yes": 0.0510849727685733}, "ground_truth": 0}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9639009727608665, "res": {"Yes": 0.9639009727608665, "yes": 0.016068857451288273}, "ground_truth": 0}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.86515873518635, "res": {"Yes": 0.86515873518635, "yes": 0.12471103971777488}, "ground_truth": 0}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8508060199894691, "res": {"Yes": 0.8508060199894691, "yes": 0.13486829086446572}, "ground_truth": 0}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7510056905122788, "res": {"Yes": 0.7510056905122788, "yes": 0.23523536802659678}, "ground_truth": 1}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8416680604179827, "res": {"Yes": 0.8416680604179827, "yes": 0.14666153939785448}, "ground_truth": 0}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7751922080431669, "res": {"Yes": 0.7751922080431669, "yes": 0.20617756818596084}, "ground_truth": 0}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8989840469465024, "res": {"Yes": 0.8989840469465024, "yes": 0.09848313142877778}, "ground_truth": 0}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8808929060231647, "res": {"Yes": 0.8808929060231647, "yes": 0.11481096336056311}, "ground_truth": 0}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.80085417673822, "res": {"Yes": 0.80085417673822, "yes": 0.19499936048263475}, "ground_truth": 1}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8621495254175899, "res": {"Yes": 0.8621495254175899, "yes": 0.13468689373037523}, "ground_truth": 0}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8570218112569454, "res": {"Yes": 0.8570218112569454, "yes": 0.139700043686882}, "ground_truth": 0}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.794850247496661, "res": {"Yes": 0.794850247496661, "yes": 0.18286141648400356}, "ground_truth": 0}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5496889314157408, "res": {"Yes": 0.5496889314157408, "yes": 0.4432660876197716}, "ground_truth": 0}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6252412898432312, "res": {"Yes": 0.6252412898432312, "yes": 0.3639717588253358}, "ground_truth": 1}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7299167298825975, "res": {"Yes": 0.7299167298825975, "yes": 0.24515012524346397}, "ground_truth": 0}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6297457263731966, "res": {"Yes": 0.6297457263731966, "yes": 0.36005301301443526}, "ground_truth": 0}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.929908960092744, "res": {"Yes": 0.929908960092744, "yes": 0.06180924068803409}, "ground_truth": 0}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9017371404139314, "res": {"Yes": 0.9017371404139314, "yes": 0.08252270593945486}, "ground_truth": 0}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9241555198289982, "res": {"Yes": 0.9241555198289982, "yes": 0.07024409017007001}, "ground_truth": 1}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.980227452362278, "res": {"Yes": 0.980227452362278, "yes": 0.014655326880772057}, "ground_truth": 0}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.950317836124634, "res": {"Yes": 0.950317836124634, "yes": 0.04352691485171621}, "ground_truth": 0}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8429966625529839, "res": {"Yes": 0.8429966625529839, "yes": 0.13360307522410592}, "ground_truth": 0}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9140736416871047, "res": {"Yes": 0.9140736416871047, "yes": 0.07557514648842018}, "ground_truth": 0}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6520360481566647, "res": {"Yes": 0.6520360481566647, "yes": 0.33159653669516853}, "ground_truth": 1}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8518071479037486, "res": {"Yes": 0.8518071479037486, "yes": 0.13879216231152494}, "ground_truth": 0}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7714117818426317, "res": {"Yes": 0.7714117818426317, "yes": 0.20391741408939548}, "ground_truth": 0}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8680973120057573, "res": {"Yes": 0.8680973120057573, "yes": 0.1264094294465507}, "ground_truth": 0}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8192244737172659, "res": {"Yes": 0.8192244737172659, "yes": 0.17712765352441634}, "ground_truth": 0}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9386582378273225, "res": {"Yes": 0.9386582378273225, "yes": 0.046662800262088244}, "ground_truth": 1}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8585719527364835, "res": {"Yes": 0.8585719527364835, "yes": 0.13369061390416567}, "ground_truth": 0}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9820494082432919, "res": {"Yes": 0.9820494082432919, "yes": 0.0108744312742021}, "ground_truth": 0}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7735519337359033, "res": {"Yes": 0.7735519337359033, "yes": 0.22312078887850162}, "ground_truth": 0}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8371795963537324, "res": {"Yes": 0.8371795963537324, "yes": 0.15997917955450636}, "ground_truth": 0}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8625974076911122, "res": {"Yes": 0.8625974076911122, "yes": 0.13310553089655405}, "ground_truth": 1}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8949268642213598, "res": {"Yes": 0.8949268642213598, "yes": 0.0986442534609376}, "ground_truth": 0}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6761017341213137, "res": {"Yes": 0.6761017341213137, "yes": 0.31815639783918415}, "ground_truth": 0}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.625305249045907, "res": {"Yes": 0.625305249045907, "yes": 0.3656515676418724}, "ground_truth": 0}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9561692613320101, "res": {"Yes": 0.9561692613320101, "yes": 0.03778339343520511}, "ground_truth": 0}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5385184290700377, "res": {"Yes": 0.5385184290700377, "yes": 0.4574956053980584}, "ground_truth": 1}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7559135811384438, "res": {"Yes": 0.7559135811384438, "yes": 0.23953234956496594}, "ground_truth": 0}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9725805200017934, "res": {"Yes": 0.9725805200017934, "yes": 0.020872271987887155}, "ground_truth": 0}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9670569227077102, "res": {"Yes": 0.9670569227077102, "yes": 0.029113415421195034}, "ground_truth": 0}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9537117900911535, "res": {"Yes": 0.9537117900911535, "yes": 0.034981270125579604}, "ground_truth": 0}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9474614176622745, "res": {"Yes": 0.9474614176622745, "yes": 0.04775263644842645}, "ground_truth": 1}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9619053576282097, "res": {"Yes": 0.9619053576282097, "yes": 0.03690821957615633}, "ground_truth": 0}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9507886555937682, "res": {"Yes": 0.9507886555937682, "yes": 0.03816816453376029}, "ground_truth": 0}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7589047982152641, "res": {"Yes": 0.7589047982152641, "yes": 0.233626638466093}, "ground_truth": 0}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8692076755912904, "res": {"Yes": 0.8692076755912904, "yes": 0.12429937607342402}, "ground_truth": 0}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9067930361189985, "res": {"Yes": 0.9067930361189985, "yes": 0.08536330110477997}, "ground_truth": 1}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8632953241559747, "res": {"Yes": 0.8632953241559747, "yes": 0.12368570105500097}, "ground_truth": 0}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8700745337726609, "res": {"Yes": 0.8700745337726609, "yes": 0.11947432311844536}, "ground_truth": 0}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6839536204624417, "res": {"Yes": 0.6839536204624417, "yes": 0.2999726725425063}, "ground_truth": 0}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8391133502762416, "res": {"Yes": 0.8391133502762416, "yes": 0.15055080153852277}, "ground_truth": 0}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8028433540243238, "res": {"Yes": 0.8028433540243238, "yes": 0.1832800624685971}, "ground_truth": 1}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8474794820424568, "res": {"Yes": 0.8474794820424568, "yes": 0.14432447726086822}, "ground_truth": 0}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7961821083257835, "res": {"Yes": 0.7961821083257835, "yes": 0.18732545009226817}, "ground_truth": 0}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8485601814093531, "res": {"Yes": 0.8485601814093531, "yes": 0.1319474159208318}, "ground_truth": 0}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7803360724324758, "res": {"Yes": 0.7803360724324758, "yes": 0.204175083658232}, "ground_truth": 0}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7483373874206107, "res": {"Yes": 0.7483373874206107, "yes": 0.22885945441959551}, "ground_truth": 1}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8594566342988965, "res": {"Yes": 0.8594566342988965, "yes": 0.13012423903734147}, "ground_truth": 0}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7621418162862847, "res": {"Yes": 0.7621418162862847, "yes": 0.21912829473145864}, "ground_truth": 0}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8362382236254935, "res": {"Yes": 0.8362382236254935, "yes": 0.14773016186025933}, "ground_truth": 0}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6240038130339807, "res": {"Yes": 0.6240038130339807, "yes": 0.3697586808673082}, "ground_truth": 0}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7572286224336406, "res": {"Yes": 0.7572286224336406, "yes": 0.2335997963074448}, "ground_truth": 1}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6887645179802233, "res": {"Yes": 0.6887645179802233, "yes": 0.3033500074605385}, "ground_truth": 0}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9724754383537455, "res": {"Yes": 0.9724754383537455, "yes": 0.01857583939309245}, "ground_truth": 0}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7387674644498575, "res": {"Yes": 0.7387674644498575, "yes": 0.25548780510699304}, "ground_truth": 0}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8337129004114225, "res": {"Yes": 0.8337129004114225, "yes": 0.1608688085336141}, "ground_truth": 0}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9724873998752005, "res": {"Yes": 0.9724873998752005, "yes": 0.022864531772400214}, "ground_truth": 1}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9767560617751752, "res": {"Yes": 0.9767560617751752, "yes": 0.020313205239739927}, "ground_truth": 0}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9804382562761286, "res": {"Yes": 0.9804382562761286, "yes": 0.015122251660090137}, "ground_truth": 0}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9182233199748829, "res": {"Yes": 0.9182233199748829, "yes": 0.07703387644874345}, "ground_truth": 0}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9812897216916248, "res": {"Yes": 0.9812897216916248, "yes": 0.008965518836820886}, "ground_truth": 0}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8041409292717113, "res": {"Yes": 0.8041409292717113, "yes": 0.18776898029476644}, "ground_truth": 1}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9008371151629141, "res": {"Yes": 0.9008371151629141, "yes": 0.09326766513074403}, "ground_truth": 0}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9434739750186635, "res": {"Yes": 0.9434739750186635, "yes": 0.0517870997659242}, "ground_truth": 0}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7338252923652033, "res": {"Yes": 0.7338252923652033, "yes": 0.25798266145024734}, "ground_truth": 0}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7545235790775894, "res": {"Yes": 0.7545235790775894, "yes": 0.23545044789521244}, "ground_truth": 0}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7531930044608787, "res": {"Yes": 0.7531930044608787, "yes": 0.22462943276617645}, "ground_truth": 1}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8620999187610946, "res": {"Yes": 0.8620999187610946, "yes": 0.129406456821931}, "ground_truth": 0}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8269132968689541, "res": {"Yes": 0.8269132968689541, "yes": 0.16398598698860828}, "ground_truth": 0}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7389372305529961, "res": {"Yes": 0.7389372305529961, "yes": 0.2545642803077809}, "ground_truth": 0}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8013202172127883, "res": {"Yes": 0.8013202172127883, "yes": 0.18976783026121563}, "ground_truth": 0}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9590005745358243, "res": {"Yes": 0.9590005745358243, "yes": 0.03108627113083199}, "ground_truth": 1}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8542456159889185, "res": {"Yes": 0.8542456159889185, "yes": 0.1387583427826466}, "ground_truth": 0}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8487144636741661, "res": {"Yes": 0.8487144636741661, "yes": 0.14605133638528597}, "ground_truth": 0}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8967090127986359, "res": {"Yes": 0.8967090127986359, "yes": 0.0968612051403004}, "ground_truth": 0}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9763946351491362, "res": {"Yes": 0.9763946351491362, "yes": 0.023185910287370402}, "ground_truth": 0}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.908898165712053, "res": {"Yes": 0.908898165712053, "yes": 0.08437116353820728}, "ground_truth": 1}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9357773357131691, "res": {"Yes": 0.9357773357131691, "yes": 0.05704828297339378}, "ground_truth": 0}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8887331475620197, "res": {"Yes": 0.8887331475620197, "yes": 0.10087308867815299}, "ground_truth": 0}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8053925908089566, "res": {"Yes": 0.8053925908089566, "yes": 0.1857231973515473}, "ground_truth": 0}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8921892100553745, "res": {"Yes": 0.8921892100553745, "yes": 0.101028734763011}, "ground_truth": 0}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8039790800339213, "res": {"Yes": 0.8039790800339213, "yes": 0.18650362557626707}, "ground_truth": 1}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.874434366448893, "res": {"Yes": 0.874434366448893, "yes": 0.11664528516232583}, "ground_truth": 0}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8284862487668289, "res": {"Yes": 0.8284862487668289, "yes": 0.1619254070658995}, "ground_truth": 0}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.884405067625498, "res": {"Yes": 0.884405067625498, "yes": 0.10423476167192078}, "ground_truth": 0}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9402880100282133, "res": {"Yes": 0.9402880100282133, "yes": 0.052962852891873406}, "ground_truth": 0}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8591196730466333, "res": {"Yes": 0.8591196730466333, "yes": 0.13215420744967038}, "ground_truth": 1}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9431427087151129, "res": {"Yes": 0.9431427087151129, "yes": 0.04978097978172718}, "ground_truth": 0}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8956518157719174, "res": {"Yes": 0.8956518157719174, "yes": 0.09448549011765532}, "ground_truth": 0}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.4126728213906513, "res": {"yes": 0.5485226915122308, "Yes": 0.4126728213906513}, "ground_truth": 0}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.593989165312793, "res": {"Yes": 0.593989165312793, "yes": 0.39558021227970686}, "ground_truth": 0}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5091019132389905, "res": {"Yes": 0.5091019132389905, "yes": 0.4699105883164873}, "ground_truth": 1}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5300736393063895, "res": {"Yes": 0.5300736393063895, "yes": 0.45993463381425903}, "ground_truth": 0}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5594958883783806, "res": {"Yes": 0.5594958883783806, "yes": 0.422962058574568}, "ground_truth": 0}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7889858203269293, "res": {"Yes": 0.7889858203269293, "yes": 0.20856240040528667}, "ground_truth": 0}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8069417856945305, "res": {"Yes": 0.8069417856945305, "yes": 0.18589544257882046}, "ground_truth": 0}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7534645568835071, "res": {"Yes": 0.7534645568835071, "yes": 0.24403603016002776}, "ground_truth": 1}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7816202501533076, "res": {"Yes": 0.7816202501533076, "yes": 0.20797150580231774}, "ground_truth": 0}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7893387573714886, "res": {"Yes": 0.7893387573714886, "yes": 0.20800717597419266}, "ground_truth": 0}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8668434185099108, "res": {"Yes": 0.8668434185099108, "yes": 0.12280275852042744}, "ground_truth": 0}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8401901279154053, "res": {"Yes": 0.8401901279154053, "yes": 0.15540086108133652}, "ground_truth": 0}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9824348184342948, "res": {"Yes": 0.9824348184342948, "yes": 0.012198186327188906}, "ground_truth": 1}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8943237615357766, "res": {"Yes": 0.8943237615357766, "yes": 0.0985360310726382}, "ground_truth": 0}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8238739780737072, "res": {"Yes": 0.8238739780737072, "yes": 0.1728754475734729}, "ground_truth": 0}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9539031925474092, "res": {"Yes": 0.9539031925474092, "yes": 0.041350635038015546}, "ground_truth": 0}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9502243969160931, "res": {"Yes": 0.9502243969160931, "yes": 0.043901490821067576}, "ground_truth": 0}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9343756936831408, "res": {"Yes": 0.9343756936831408, "yes": 0.059458243661508564}, "ground_truth": 1}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9710253450725518, "res": {"Yes": 0.9710253450725518, "yes": 0.024410034041654857}, "ground_truth": 0}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9501385052638802, "res": {"Yes": 0.9501385052638802, "yes": 0.04370441291247921}, "ground_truth": 0}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9030887923505394, "res": {"Yes": 0.9030887923505394, "yes": 0.08948776204303555}, "ground_truth": 0}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9193520229236808, "res": {"Yes": 0.9193520229236808, "yes": 0.07479402327011903}, "ground_truth": 0}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.85427745431633, "res": {"Yes": 0.85427745431633, "yes": 0.13762732367176153}, "ground_truth": 1}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8351938962585662, "res": {"Yes": 0.8351938962585662, "yes": 0.15496783495759672}, "ground_truth": 0}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.93713123861081, "res": {"Yes": 0.93713123861081, "yes": 0.057376584180126915}, "ground_truth": 0}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8198033127462687, "res": {"Yes": 0.8198033127462687, "yes": 0.17607713727306445}, "ground_truth": 0}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7461542281490467, "res": {"Yes": 0.7461542281490467, "yes": 0.2486351793653936}, "ground_truth": 0}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.822847461462908, "res": {"Yes": 0.822847461462908, "yes": 0.17404494648369762}, "ground_truth": 1}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8356577580829274, "res": {"Yes": 0.8356577580829274, "yes": 0.1586041030004}, "ground_truth": 0}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8625337244753971, "res": {"Yes": 0.8625337244753971, "yes": 0.13169433310159667}, "ground_truth": 0}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9321676299575749, "res": {"Yes": 0.9321676299575749, "yes": 0.05776162217758227}, "ground_truth": 0}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8178905772256634, "res": {"Yes": 0.8178905772256634, "yes": 0.16692264123925288}, "ground_truth": 0}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7582331916137972, "res": {"Yes": 0.7582331916137972, "yes": 0.22271933224993326}, "ground_truth": 1}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8729405796557339, "res": {"Yes": 0.8729405796557339, "yes": 0.10953936462893628}, "ground_truth": 0}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8617186618118459, "res": {"Yes": 0.8617186618118459, "yes": 0.11443926882231384}, "ground_truth": 0}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7194085592711271, "res": {"Yes": 0.7194085592711271, "yes": 0.2609444055249244}, "ground_truth": 0}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.857503841558906, "res": {"Yes": 0.857503841558906, "yes": 0.1278512372417523}, "ground_truth": 0}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7748500594688046, "res": {"Yes": 0.7748500594688046, "yes": 0.21829341080699444}, "ground_truth": 1}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7770747732310629, "res": {"Yes": 0.7770747732310629, "yes": 0.20408171532993122}, "ground_truth": 0}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8309704037363299, "res": {"Yes": 0.8309704037363299, "yes": 0.16191434793827042}, "ground_truth": 0}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7357002737127616, "res": {"Yes": 0.7357002737127616, "yes": 0.25748165606460316}, "ground_truth": 0}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8407724385395707, "res": {"Yes": 0.8407724385395707, "yes": 0.1514141774537523}, "ground_truth": 0}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5383600148676849, "res": {"Yes": 0.5383600148676849, "yes": 0.45616224149262347}, "ground_truth": 1}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8436727133360237, "res": {"Yes": 0.8436727133360237, "yes": 0.15325772463308657}, "ground_truth": 0}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8376233270038788, "res": {"Yes": 0.8376233270038788, "yes": 0.1557964393166761}, "ground_truth": 0}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8169739659925841, "res": {"Yes": 0.8169739659925841, "yes": 0.15912734267213363}, "ground_truth": 0}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8877787787993098, "res": {"Yes": 0.8877787787993098, "yes": 0.1013526762919614}, "ground_truth": 0}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8901910343071705, "res": {"Yes": 0.8901910343071705, "yes": 0.0963965949581853}, "ground_truth": 1}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9055799606822033, "res": {"Yes": 0.9055799606822033, "yes": 0.08487261446066582}, "ground_truth": 0}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8895101028882955, "res": {"Yes": 0.8895101028882955, "yes": 0.09011208451487825}, "ground_truth": 0}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.808086574130235, "res": {"Yes": 0.808086574130235, "yes": 0.18180741708930367}, "ground_truth": 0}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7941750071455969, "res": {"Yes": 0.7941750071455969, "yes": 0.19497949106016585}, "ground_truth": 0}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9548068483442327, "res": {"Yes": 0.9548068483442327, "yes": 0.02681522113207626}, "ground_truth": 1}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9714470208520677, "res": {"Yes": 0.9714470208520677, " Yes": 0.01638717929799847}, "ground_truth": 0}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9560375141098041, "res": {"Yes": 0.9560375141098041, "yes": 0.02768676992283285}, "ground_truth": 0}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9710822099817475, "res": {"Yes": 0.9710822099817475, "yes": 0.023064929927998763}, "ground_truth": 0}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9669349777495764, "res": {"Yes": 0.9669349777495764, "yes": 0.024957542446596405}, "ground_truth": 0}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9789302550506108, "res": {"Yes": 0.9789302550506108, "yes": 0.01318215840049861}, "ground_truth": 1}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.778335378443629, "res": {"Yes": 0.778335378443629, "yes": 0.21460782205192794}, "ground_truth": 0}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6885420623098422, "res": {"Yes": 0.6885420623098422, "yes": 0.2999607038716484}, "ground_truth": 0}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8519356101074591, "res": {"Yes": 0.8519356101074591, "yes": 0.14302826656061166}, "ground_truth": 0}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6653845598368121, "res": {"Yes": 0.6653845598368121, "yes": 0.3274739398663825}, "ground_truth": 0}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8779819883194362, "res": {"Yes": 0.8779819883194362, "yes": 0.1129058266739639}, "ground_truth": 1}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7804420726800382, "res": {"Yes": 0.7804420726800382, "yes": 0.21239422059271895}, "ground_truth": 0}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6998612695061792, "res": {"Yes": 0.6998612695061792, "yes": 0.289482104233197}, "ground_truth": 0}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8131738131274124, "res": {"Yes": 0.8131738131274124, "yes": 0.1811272239282193}, "ground_truth": 0}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8838501573255305, "res": {"Yes": 0.8838501573255305, "yes": 0.10869950920484363}, "ground_truth": 0}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.898653707643524, "res": {"Yes": 0.898653707643524, "yes": 0.09367223047421769}, "ground_truth": 1}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.91565122092411, "res": {"Yes": 0.91565122092411, "yes": 0.07851382147083953}, "ground_truth": 0}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7605437507569804, "res": {"Yes": 0.7605437507569804, "yes": 0.22986447105988805}, "ground_truth": 0}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9769483131230099, "res": {"Yes": 0.9769483131230099, "yes": 0.016317828433050602}, "ground_truth": 0}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9769528676666623, "res": {"Yes": 0.9769528676666623, "yes": 0.019151280476065468}, "ground_truth": 0}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8079157465255548, "res": {"Yes": 0.8079157465255548, "yes": 0.1882008001962686}, "ground_truth": 1}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9767654992378363, "res": {"Yes": 0.9767654992378363, "yes": 0.0211675400584276}, "ground_truth": 0}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.785072357726385, "res": {"Yes": 0.785072357726385, "yes": 0.20812467079800648}, "ground_truth": 0}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.814327825520924, "res": {"Yes": 0.814327825520924, "yes": 0.17711475910060245}, "ground_truth": 0}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9169616671986653, "res": {"Yes": 0.9169616671986653, "yes": 0.07644811527134274}, "ground_truth": 0}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9510654685577297, "res": {"Yes": 0.9510654685577297, "yes": 0.04363529664774641}, "ground_truth": 1}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8510704235116042, "res": {"Yes": 0.8510704235116042, "yes": 0.1384048490394663}, "ground_truth": 0}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9182548890352927, "res": {"Yes": 0.9182548890352927, "yes": 0.0753304758720008}, "ground_truth": 0}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6119751695032581, "res": {"Yes": 0.6119751695032581, "yes": 0.3847540038305788}, "ground_truth": 0}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7290508983504298, "res": {"Yes": 0.7290508983504298, "yes": 0.26839014289163715}, "ground_truth": 0}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6056804150042563, "res": {"Yes": 0.6056804150042563, "yes": 0.377427919593075}, "ground_truth": 1}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6562814975246812, "res": {"Yes": 0.6562814975246812, "yes": 0.3366046441781185}, "ground_truth": 0}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7804020760487358, "res": {"Yes": 0.7804020760487358, "yes": 0.21639777801833068}, "ground_truth": 0}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6501946495992522, "res": {"Yes": 0.6501946495992522, "yes": 0.33738829541585263}, "ground_truth": 0}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8031253781904889, "res": {"Yes": 0.8031253781904889, "yes": 0.18328534101040686}, "ground_truth": 0}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8651055555133921, "res": {"Yes": 0.8651055555133921, "yes": 0.1261919018638641}, "ground_truth": 1}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8990987061883075, "res": {"Yes": 0.8990987061883075, "yes": 0.08993668106294214}, "ground_truth": 0}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7915807368492327, "res": {"Yes": 0.7915807368492327, "yes": 0.20125077736654856}, "ground_truth": 0}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7969901333540356, "res": {"Yes": 0.7969901333540356, "yes": 0.19619198183298647}, "ground_truth": 0}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8099513215284493, "res": {"Yes": 0.8099513215284493, "yes": 0.18354799363312813}, "ground_truth": 0}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8459007779510448, "res": {"Yes": 0.8459007779510448, "yes": 0.14891559649737346}, "ground_truth": 1}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7985370842607799, "res": {"Yes": 0.7985370842607799, "yes": 0.19084601123677608}, "ground_truth": 0}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8361077722763478, "res": {"Yes": 0.8361077722763478, "yes": 0.15849394314360118}, "ground_truth": 0}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8544783014669398, "res": {"Yes": 0.8544783014669398, "yes": 0.13748715285809854}, "ground_truth": 0}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9197289094520383, "res": {"Yes": 0.9197289094520383, "yes": 0.07109211844537024}, "ground_truth": 0}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9053608368467716, "res": {"Yes": 0.9053608368467716, "yes": 0.0887392841400915}, "ground_truth": 1}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9344276370717057, "res": {"Yes": 0.9344276370717057, "yes": 0.058912122179577434}, "ground_truth": 0}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9170998087087694, "res": {"Yes": 0.9170998087087694, "yes": 0.07566816111086692}, "ground_truth": 0}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8393428120167591, "res": {"Yes": 0.8393428120167591, "yes": 0.15542902226869101}, "ground_truth": 0}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9851052546561225, "res": {"Yes": 0.9851052546561225, "yes": 0.00994977142935117}, "ground_truth": 0}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8895796199296343, "res": {"Yes": 0.8895796199296343, "yes": 0.10487232721023541}, "ground_truth": 1}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9947152709742042, "res": {"Yes": 0.9947152709742042, "yes": 0.001961652595560523}, "ground_truth": 0}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7597142524649997, "res": {"Yes": 0.7597142524649997, "yes": 0.23303134653043495}, "ground_truth": 0}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7647905679670959, "res": {"Yes": 0.7647905679670959, "yes": 0.21171302012847726}, "ground_truth": 0}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7161848463837468, "res": {"Yes": 0.7161848463837468, "yes": 0.2451501742734939}, "ground_truth": 0}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7547655935943531, "res": {"Yes": 0.7547655935943531, "yes": 0.19461985200873008}, "ground_truth": 1}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8815326664778824, "res": {"Yes": 0.8815326664778824, "yes": 0.09093859322230573}, "ground_truth": 0}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8083703997250725, "res": {"Yes": 0.8083703997250725, "yes": 0.17498381413382594}, "ground_truth": 0}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9124841806041367, "res": {"Yes": 0.9124841806041367, "yes": 0.07251501029494974}, "ground_truth": 0}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9268535255652592, "res": {"Yes": 0.9268535255652592, "yes": 0.06377887859164506}, "ground_truth": 0}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9103210586421335, "res": {"Yes": 0.9103210586421335, "yes": 0.07763302984659076}, "ground_truth": 1}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9227972986754003, "res": {"Yes": 0.9227972986754003, "yes": 0.06497288824640436}, "ground_truth": 0}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9119569399404907, "res": {"Yes": 0.9119569399404907, "yes": 0.07470542990453831}, "ground_truth": 0}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5800073112022501, "res": {"Yes": 0.5800073112022501, "yes": 0.2347026699917619}, "ground_truth": 0}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5669928644941357, "res": {"Yes": 0.5669928644941357, "yes": 0.38793941440190516}, "ground_truth": 0}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6458026352942888, "res": {"Yes": 0.6458026352942888, "yes": 0.3001417247637557}, "ground_truth": 1}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9599534694671327, "res": {"Yes": 0.9599534694671327, "yes": 0.03332745990625649}, "ground_truth": 0}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5926016022993338, "res": {"Yes": 0.5926016022993338, "yes": 0.36035670171168915}, "ground_truth": 0}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6560248038538739, "res": {"Yes": 0.6560248038538739, "yes": 0.2549985806289965}, "ground_truth": 0}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7033469015942347, "res": {"Yes": 0.7033469015942347, "yes": 0.26706118220414743}, "ground_truth": 0}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6775735305095827, "res": {"Yes": 0.6775735305095827, "yes": 0.17854707268633369}, "ground_truth": 1}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5906927135648876, "res": {"Yes": 0.5906927135648876, "yes": 0.3167064688021337}, "ground_truth": 0}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7616768306596946, "res": {"Yes": 0.7616768306596946, "yes": 0.1768800428815447}, "ground_truth": 0}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8701767039227085, "res": {"Yes": 0.8701767039227085, "yes": 0.11501902839525847}, "ground_truth": 0}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7479009721094707, "res": {"Yes": 0.7479009721094707, "yes": 0.24482828297231388}, "ground_truth": 0}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8743120591240879, "res": {"Yes": 0.8743120591240879, "yes": 0.11911588931196147}, "ground_truth": 1}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8320791753569234, "res": {"Yes": 0.8320791753569234, "yes": 0.1562010217975994}, "ground_truth": 0}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8070872823444747, "res": {"Yes": 0.8070872823444747, "yes": 0.18309753316555363}, "ground_truth": 0}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9238692145993075, "res": {"Yes": 0.9238692145993075, "yes": 0.07167920509581996}, "ground_truth": 0}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7826352563026069, "res": {"Yes": 0.7826352563026069, "yes": 0.20976036896118944}, "ground_truth": 0}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7121907258905942, "res": {"Yes": 0.7121907258905942, "yes": 0.2809172884865311}, "ground_truth": 1}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7961109248657822, "res": {"Yes": 0.7961109248657822, "yes": 0.19903380386805547}, "ground_truth": 0}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9827437238642251, "res": {"Yes": 0.9827437238642251, "yes": 0.01384838998051787}, "ground_truth": 0}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7616596931238027, "res": {"Yes": 0.7616596931238027, "yes": 0.22550180852613785}, "ground_truth": 0}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7358389515689416, "res": {"Yes": 0.7358389515689416, "yes": 0.26071675260996613}, "ground_truth": 0}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8631691023419783, "res": {"Yes": 0.8631691023419783, "yes": 0.1319218733737079}, "ground_truth": 1}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8262046465779547, "res": {"Yes": 0.8262046465779547, "yes": 0.16964222813013424}, "ground_truth": 0}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8542588398134061, "res": {"Yes": 0.8542588398134061, "yes": 0.1420792895645906}, "ground_truth": 0}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8975595678128785, "res": {"Yes": 0.8975595678128785, "yes": 0.09861001998212526}, "ground_truth": 0}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7413315928921153, "res": {"Yes": 0.7413315928921153, "yes": 0.2528368651518573}, "ground_truth": 0}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8767248200681377, "res": {"Yes": 0.8767248200681377, "yes": 0.11831145294049845}, "ground_truth": 1}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9128823660563093, "res": {"Yes": 0.9128823660563093, "yes": 0.07969705061149572}, "ground_truth": 0}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9741231918487888, "res": {"Yes": 0.9741231918487888, "yes": 0.020184146690647036}, "ground_truth": 0}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7811579803662024, "res": {"Yes": 0.7811579803662024, "yes": 0.21346672783078569}, "ground_truth": 0}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8635116239931685, "res": {"Yes": 0.8635116239931685, "yes": 0.13254270282592368}, "ground_truth": 0}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9522460392418923, "res": {"Yes": 0.9522460392418923, "yes": 0.04020785115753314}, "ground_truth": 1}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.873585851281548, "res": {"Yes": 0.873585851281548, "yes": 0.12017346960540856}, "ground_truth": 0}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8510717171396311, "res": {"Yes": 0.8510717171396311, "yes": 0.144544609906132}, "ground_truth": 0}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8574825414280305, "res": {"Yes": 0.8574825414280305, "yes": 0.13408974022643233}, "ground_truth": 0}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9792473672274344, "res": {"Yes": 0.9792473672274344, "yes": 0.015766166432204593}, "ground_truth": 0}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8877183719475218, "res": {"Yes": 0.8877183719475218, "yes": 0.10135735889377336}, "ground_truth": 1}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9011392255034779, "res": {"Yes": 0.9011392255034779, "yes": 0.09272160221086642}, "ground_truth": 0}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8246827352040084, "res": {"Yes": 0.8246827352040084, "yes": 0.17000867734987124}, "ground_truth": 0}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8044566326251558, "res": {"Yes": 0.8044566326251558, "yes": 0.18943028714615817}, "ground_truth": 0}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.876153811109836, "res": {"Yes": 0.876153811109836, "yes": 0.12062250541913021}, "ground_truth": 0}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8913256768025082, "res": {"Yes": 0.8913256768025082, "yes": 0.10293990578498312}, "ground_truth": 1}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9068988106273809, "res": {"Yes": 0.9068988106273809, "yes": 0.08643118766159626}, "ground_truth": 0}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9272651723565114, "res": {"Yes": 0.9272651723565114, "yes": 0.06903086193237509}, "ground_truth": 0}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7406194710021939, "res": {"Yes": 0.7406194710021939, "yes": 0.25093558656897264}, "ground_truth": 0}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6381480942019718, "res": {"Yes": 0.6381480942019718, "yes": 0.3510164466715168}, "ground_truth": 0}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6240540036790527, "res": {"Yes": 0.6240540036790527, "yes": 0.3488288423996512}, "ground_truth": 1}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.48951270612207026, "res": {"yes": 0.5034083747734782, "Yes": 0.48951270612207026}, "ground_truth": 0}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6711130810865085, "res": {"Yes": 0.6711130810865085, "yes": 0.3222207530030156}, "ground_truth": 0}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.49839432400835315, "res": {"Yes": 0.49839432400835315, "yes": 0.4559913677590012}, "ground_truth": 0}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6083042445226353, "res": {"Yes": 0.6083042445226353, "\u064a": 0.142173960105893}, "ground_truth": 0}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8744498703076515, "res": {"Yes": 0.8744498703076515, "yes": 0.12270943933851541}, "ground_truth": 1}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8646448105680274, "res": {"Yes": 0.8646448105680274, "yes": 0.1311636629572137}, "ground_truth": 0}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9397913250407917, "res": {"Yes": 0.9397913250407917, "yes": 0.056633392173831905}, "ground_truth": 0}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9763300849044512, "res": {"Yes": 0.9763300849044512, "yes": 0.017122737848453893}, "ground_truth": 0}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9662042627503042, "res": {"Yes": 0.9662042627503042, "yes": 0.027099703286339}, "ground_truth": 0}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8085297452338424, "res": {"Yes": 0.8085297452338424, "yes": 0.18679315450083897}, "ground_truth": 1}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7165472559613056, "res": {"Yes": 0.7165472559613056, "yes": 0.27646258047473365}, "ground_truth": 0}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9735136263402456, "res": {"Yes": 0.9735136263402456, "yes": 0.023856167492603882}, "ground_truth": 0}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8908287462225339, "res": {"Yes": 0.8908287462225339, "yes": 0.10380588648329071}, "ground_truth": 0}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8946170494344035, "res": {"Yes": 0.8946170494344035, "yes": 0.10072324215304262}, "ground_truth": 0}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8312998075084295, "res": {"Yes": 0.8312998075084295, "yes": 0.16582275533677343}, "ground_truth": 1}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9234818010814355, "res": {"Yes": 0.9234818010814355, "yes": 0.07317750602353733}, "ground_truth": 0}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.789938890745274, "res": {"Yes": 0.789938890745274, "yes": 0.19851647198609987}, "ground_truth": 0}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9149801801312162, "res": {"Yes": 0.9149801801312162, "yes": 0.07797515723583139}, "ground_truth": 0}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9345191500510641, "res": {"Yes": 0.9345191500510641, "yes": 0.05874849166938734}, "ground_truth": 0}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8947222089024949, "res": {"Yes": 0.8947222089024949, "yes": 0.09865488788464585}, "ground_truth": 1}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9135919238851417, "res": {"Yes": 0.9135919238851417, "yes": 0.07700910633732494}, "ground_truth": 0}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8892144112852322, "res": {"Yes": 0.8892144112852322, "yes": 0.0844875662851727}, "ground_truth": 0}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8198077232999557, "res": {"Yes": 0.8198077232999557, "yes": 0.17405378819155648}, "ground_truth": 0}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8103614226752502, "res": {"Yes": 0.8103614226752502, "yes": 0.18301623760409036}, "ground_truth": 0}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8572148428745681, "res": {"Yes": 0.8572148428745681, "yes": 0.1403920237361942}, "ground_truth": 1}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9651932621855301, "res": {"Yes": 0.9651932621855301, "yes": 0.029431383735091812}, "ground_truth": 0}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8445520166660093, "res": {"Yes": 0.8445520166660093, "yes": 0.14598808064854568}, "ground_truth": 0}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6541088526590978, "res": {"Yes": 0.6541088526590978, "yes": 0.33692446945368826}, "ground_truth": 0}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6812217511951411, "res": {"Yes": 0.6812217511951411, "yes": 0.2991032342893455}, "ground_truth": 0}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6256532156434705, "res": {"Yes": 0.6256532156434705, "yes": 0.36487589387473723}, "ground_truth": 1}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.781475054230901, "res": {"Yes": 0.781475054230901, "yes": 0.20988093190361073}, "ground_truth": 0}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7098725727885752, "res": {"Yes": 0.7098725727885752, "yes": 0.28235862390914906}, "ground_truth": 0}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9840200825919369, "res": {"Yes": 0.9840200825919369, "yes": 0.009727603628636373}, "ground_truth": 0}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9681792918530482, "res": {"Yes": 0.9681792918530482, "yes": 0.020204324769309612}, "ground_truth": 0}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5283849969998641, "res": {"Yes": 0.5283849969998641, "yes": 0.4679200785391464}, "ground_truth": 1}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5704556793941071, "res": {"Yes": 0.5704556793941071, "yes": 0.4246205637540762}, "ground_truth": 0}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9518619121254263, "res": {"Yes": 0.9518619121254263, "yes": 0.040444888878009434}, "ground_truth": 0}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.984782843074046, "res": {"Yes": 0.984782843074046, "yes": 0.010945317935057689}, "ground_truth": 0}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9232965101205377, "res": {"Yes": 0.9232965101205377, "yes": 0.053720188413942244}, "ground_truth": 0}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8904817015282867, "res": {"Yes": 0.8904817015282867, "yes": 0.10424102636934955}, "ground_truth": 1}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9352656896554202, "res": {"Yes": 0.9352656896554202, "yes": 0.06104977279829487}, "ground_truth": 0}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8668412600724859, "res": {"Yes": 0.8668412600724859, "yes": 0.12852683034277204}, "ground_truth": 0}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8915395849755899, "res": {"Yes": 0.8915395849755899, "yes": 0.09990575373911706}, "ground_truth": 0}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6970220173382068, "res": {"Yes": 0.6970220173382068, "yes": 0.2951821664014206}, "ground_truth": 0}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9849991960532255, "res": {"Yes": 0.9849991960532255, "yes": 0.008947769460919752}, "ground_truth": 1}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6444055597005359, "res": {"Yes": 0.6444055597005359, "yes": 0.3364580847712062}, "ground_truth": 0}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6502284345909719, "res": {"Yes": 0.6502284345909719, "yes": 0.32645312784652697}, "ground_truth": 0}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9216110030854172, "res": {"Yes": 0.9216110030854172, "yes": 0.07716943654627632}, "ground_truth": 0}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8383449318528212, "res": {"Yes": 0.8383449318528212, "yes": 0.15406065353273649}, "ground_truth": 0}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9728058842614804, "res": {"Yes": 0.9728058842614804, "yes": 0.024276130386953195}, "ground_truth": 1}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.780962512229984, "res": {"Yes": 0.780962512229984, "yes": 0.2113032444012024}, "ground_truth": 0}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8460042548627227, "res": {"Yes": 0.8460042548627227, "yes": 0.14873978636593005}, "ground_truth": 0}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8096827052275747, "res": {"Yes": 0.8096827052275747, "yes": 0.18376520426738746}, "ground_truth": 0}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8498373846302449, "res": {"Yes": 0.8498373846302449, "yes": 0.14430345071635228}, "ground_truth": 0}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7878256735955612, "res": {"Yes": 0.7878256735955612, "yes": 0.2078654258033662}, "ground_truth": 1}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8867437708943426, "res": {"Yes": 0.8867437708943426, "yes": 0.10315992757850674}, "ground_truth": 0}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8462513241755067, "res": {"Yes": 0.8462513241755067, "yes": 0.14361246823329962}, "ground_truth": 0}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8777020971648994, "res": {"Yes": 0.8777020971648994, "yes": 0.11619974837664715}, "ground_truth": 0}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8228435940889275, "res": {"Yes": 0.8228435940889275, "yes": 0.16532029581372293}, "ground_truth": 0}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8996736072909126, "res": {"Yes": 0.8996736072909126, "yes": 0.09498130464105325}, "ground_truth": 1}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9063366713812581, "res": {"Yes": 0.9063366713812581, "yes": 0.08233522134248045}, "ground_truth": 0}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6036076730653546, "res": {"Yes": 0.6036076730653546, "yes": 0.39084095752243186}, "ground_truth": 0}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7862916338004518, "res": {"Yes": 0.7862916338004518, "yes": 0.2072510538620348}, "ground_truth": 0}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6803131038138522, "res": {"Yes": 0.6803131038138522, "yes": 0.3026235969502021}, "ground_truth": 0}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6804641773069816, "res": {"Yes": 0.6804641773069816, "yes": 0.31307875708911215}, "ground_truth": 1}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8285098112508007, "res": {"Yes": 0.8285098112508007, "yes": 0.16197096309006154}, "ground_truth": 0}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8708742124087471, "res": {"Yes": 0.8708742124087471, "yes": 0.11763307888906455}, "ground_truth": 0}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8354495046969179, "res": {"Yes": 0.8354495046969179, "yes": 0.1579759140969602}, "ground_truth": 0}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7621533094715327, "res": {"Yes": 0.7621533094715327, "yes": 0.22868700516714294}, "ground_truth": 0}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8769244204238285, "res": {"Yes": 0.8769244204238285, "yes": 0.11820530422700853}, "ground_truth": 1}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9214940672843845, "res": {"Yes": 0.9214940672843845, "yes": 0.0720701736922092}, "ground_truth": 0}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8226602520511492, "res": {"Yes": 0.8226602520511492, "yes": 0.1706441644891379}, "ground_truth": 0}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9854230240141146, "res": {"Yes": 0.9854230240141146, "yes": 0.010222030856544264}, "ground_truth": 0}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.864512305258501, "res": {"Yes": 0.864512305258501, "yes": 0.12117142505708886}, "ground_truth": 0}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9751922323825967, "res": {"Yes": 0.9751922323825967, "yes": 0.02162781632417626}, "ground_truth": 1}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9352446183568014, "res": {"Yes": 0.9352446183568014, "yes": 0.0612887301753393}, "ground_truth": 0}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9271888189223717, "res": {"Yes": 0.9271888189223717, "yes": 0.06797624316968118}, "ground_truth": 0}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6719636313086133, "res": {"Yes": 0.6719636313086133, "yes": 0.31342120193260925}, "ground_truth": 0}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6879260378174394, "res": {"Yes": 0.6879260378174394, "yes": 0.30616243026629913}, "ground_truth": 0}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7895029490141523, "res": {"Yes": 0.7895029490141523, "yes": 0.20226612462829882}, "ground_truth": 1}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7407203872798833, "res": {"Yes": 0.7407203872798833, "yes": 0.24826575843125054}, "ground_truth": 0}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8974254375586017, "res": {"Yes": 0.8974254375586017, "yes": 0.09014852419643288}, "ground_truth": 0}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7007037302604073, "res": {"Yes": 0.7007037302604073, "yes": 0.2907885233167371}, "ground_truth": 0}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7041400274322267, "res": {"Yes": 0.7041400274322267, "yes": 0.2890459809188199}, "ground_truth": 0}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7880299438788322, "res": {"Yes": 0.7880299438788322, "yes": 0.20809784525688388}, "ground_truth": 1}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8051996982243829, "res": {"Yes": 0.8051996982243829, "yes": 0.1861368543023828}, "ground_truth": 0}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6108449721950155, "res": {"Yes": 0.6108449721950155, "yes": 0.3755562570343848}, "ground_truth": 0}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9927747028765773, "res": {"Yes": 0.9927747028765773, "yes": 0.0065476138068998775}, "ground_truth": 0}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.981457457083741, "res": {"Yes": 0.981457457083741, "yes": 0.014876676916828438}, "ground_truth": 0}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9800930028039111, "res": {"Yes": 0.9800930028039111, "yes": 0.016875253839431444}, "ground_truth": 1}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8236684142104885, "res": {"Yes": 0.8236684142104885, "yes": 0.17291936351424492}, "ground_truth": 0}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9885994425377895, "res": {"Yes": 0.9885994425377895, "yes": 0.00845054789559224}, "ground_truth": 0}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9391365872707208, "res": {"Yes": 0.9391365872707208, "yes": 0.05389677693389022}, "ground_truth": 0}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9316072487816709, "res": {"Yes": 0.9316072487816709, "yes": 0.054749316192402885}, "ground_truth": 0}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9460886026590423, "res": {"Yes": 0.9460886026590423, "yes": 0.05052786058883751}, "ground_truth": 1}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9472215470108625, "res": {"Yes": 0.9472215470108625, "yes": 0.048621639012403524}, "ground_truth": 0}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.913852087950129, "res": {"Yes": 0.913852087950129, "yes": 0.07861173424821218}, "ground_truth": 0}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9406475405091743, "res": {"Yes": 0.9406475405091743, "yes": 0.057294239076100845}, "ground_truth": 0}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9228385080316857, "res": {"Yes": 0.9228385080316857, "yes": 0.07331009191222393}, "ground_truth": 0}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9459126881635311, "res": {"Yes": 0.9459126881635311, "yes": 0.05101524343675695}, "ground_truth": 1}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9045164115296552, "res": {"Yes": 0.9045164115296552, "yes": 0.08938168353077423}, "ground_truth": 0}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9134806003563773, "res": {"Yes": 0.9134806003563773, "yes": 0.08433215933065497}, "ground_truth": 0}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8644962773489407, "res": {"Yes": 0.8644962773489407, "yes": 0.12947902241986203}, "ground_truth": 0}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8519019593154674, "res": {"Yes": 0.8519019593154674, "yes": 0.14199639628181332}, "ground_truth": 0}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8161063513286282, "res": {"Yes": 0.8161063513286282, "yes": 0.17782028029595653}, "ground_truth": 1}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8614263819523256, "res": {"Yes": 0.8614263819523256, "yes": 0.1342724277440579}, "ground_truth": 0}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.838830046315853, "res": {"Yes": 0.838830046315853, "yes": 0.15286460255661785}, "ground_truth": 0}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5334181184304494, "res": {"Yes": 0.5334181184304494, "yes": 0.43223272090037246}, "ground_truth": 0}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.4920054919756478, "res": {"yes": 0.5008556215340654, "Yes": 0.4920054919756478}, "ground_truth": 0}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8501679506635265, "res": {"Yes": 0.8501679506635265, "yes": 0.13683994784655473}, "ground_truth": 1}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5786867886626418, "res": {"Yes": 0.5786867886626418, "yes": 0.3957113292869444}, "ground_truth": 0}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.82823463428465, "res": {"Yes": 0.82823463428465, "yes": 0.16045513631555516}, "ground_truth": 0}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.945807319359221, "res": {"Yes": 0.945807319359221, "yes": 0.04980625502137752}, "ground_truth": 0}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9260235524946165, "res": {"Yes": 0.9260235524946165, "yes": 0.06913008219734458}, "ground_truth": 0}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9251483277507104, "res": {"Yes": 0.9251483277507104, "yes": 0.07197641378166358}, "ground_truth": 1}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9404965297881115, "res": {"Yes": 0.9404965297881115, "yes": 0.0545913409209272}, "ground_truth": 0}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.970991842363046, "res": {"Yes": 0.970991842363046, "yes": 0.021925961228941623}, "ground_truth": 0}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9224984345810553, "res": {"Yes": 0.9224984345810553, "yes": 0.06982431342468684}, "ground_truth": 0}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9229395311552255, "res": {"Yes": 0.9229395311552255, "yes": 0.07082778583332068}, "ground_truth": 0}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6256200756703578, "res": {"Yes": 0.6256200756703578, "yes": 0.3666078246386594}, "ground_truth": 1}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8313591062385631, "res": {"Yes": 0.8313591062385631, "yes": 0.15893178404271605}, "ground_truth": 0}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8944011954391157, "res": {"Yes": 0.8944011954391157, "yes": 0.09948278150435425}, "ground_truth": 0}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6384642217032978, "res": {"Yes": 0.6384642217032978, "yes": 0.35023241570762276}, "ground_truth": 0}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9794805097087055, "res": {"Yes": 0.9794805097087055, "yes": 0.015252833346088525}, "ground_truth": 0}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9880284268824014, "res": {"Yes": 0.9880284268824014, "yes": 0.009547791574261945}, "ground_truth": 1}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9095254127163187, "res": {"Yes": 0.9095254127163187, "yes": 0.08534992572348536}, "ground_truth": 0}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9524285384930113, "res": {"Yes": 0.9524285384930113, "yes": 0.04570998567624735}, "ground_truth": 0}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8266930199727905, "res": {"Yes": 0.8266930199727905, "yes": 0.1594595490330535}, "ground_truth": 0}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8007178669419115, "res": {"Yes": 0.8007178669419115, "yes": 0.19264532018569727}, "ground_truth": 0}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7269991650036246, "res": {"Yes": 0.7269991650036246, "yes": 0.26818846982954225}, "ground_truth": 1}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9824463247782693, "res": {"Yes": 0.9824463247782693, "yes": 0.010470327235537468}, "ground_truth": 0}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7168509213606283, "res": {"Yes": 0.7168509213606283, "yes": 0.2764125452759898}, "ground_truth": 0}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.676701668570993, "res": {"Yes": 0.676701668570993, "yes": 0.3179607918095324}, "ground_truth": 0}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9829298131945858, "res": {"Yes": 0.9829298131945858, "yes": 0.014983114679139159}, "ground_truth": 0}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9100379154586878, "res": {"Yes": 0.9100379154586878, "yes": 0.08324193763804756}, "ground_truth": 1}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9202505491345107, "res": {"Yes": 0.9202505491345107, "yes": 0.07673564084390794}, "ground_truth": 0}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8894480332599923, "res": {"Yes": 0.8894480332599923, "yes": 0.10297317104225515}, "ground_truth": 0}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8738702410207789, "res": {"Yes": 0.8738702410207789, "yes": 0.11728528250912378}, "ground_truth": 0}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8767356301518131, "res": {"Yes": 0.8767356301518131, "yes": 0.10694585506769774}, "ground_truth": 0}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8429039127329281, "res": {"Yes": 0.8429039127329281, "yes": 0.1411894038915301}, "ground_truth": 1}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9306644551893642, "res": {"Yes": 0.9306644551893642, "yes": 0.059773886141374466}, "ground_truth": 0}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.867036139221959, "res": {"Yes": 0.867036139221959, "yes": 0.11116047511084215}, "ground_truth": 0}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8131757728786636, "res": {"Yes": 0.8131757728786636, "yes": 0.18217955724219395}, "ground_truth": 0}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8673102613400172, "res": {"Yes": 0.8673102613400172, "yes": 0.12002376669726647}, "ground_truth": 0}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8782699785143764, "res": {"Yes": 0.8782699785143764, "yes": 0.11527231422631004}, "ground_truth": 1}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.823124412687839, "res": {"Yes": 0.823124412687839, "yes": 0.1720364338312597}, "ground_truth": 0}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8676515196431267, "res": {"Yes": 0.8676515196431267, "yes": 0.12747370129653157}, "ground_truth": 0}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7271828063729037, "res": {"Yes": 0.7271828063729037, "yes": 0.25635345581417823}, "ground_truth": 0}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7031477771114274, "res": {"Yes": 0.7031477771114274, "yes": 0.2907280455937884}, "ground_truth": 0}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5427487355154056, "res": {"Yes": 0.5427487355154056, "yes": 0.44626904452151667}, "ground_truth": 1}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7050437650581267, "res": {"Yes": 0.7050437650581267, "yes": 0.28861602716559825}, "ground_truth": 0}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7436197079922336, "res": {"Yes": 0.7436197079922336, "yes": 0.25028625403772925}, "ground_truth": 0}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9179900988373868, "res": {"Yes": 0.9179900988373868, "yes": 0.07239367827836143}, "ground_truth": 0}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9218054512608417, "res": {"Yes": 0.9218054512608417, "yes": 0.06740628608882432}, "ground_truth": 0}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8631674191638697, "res": {"Yes": 0.8631674191638697, "yes": 0.12781061257415846}, "ground_truth": 1}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8772660226954513, "res": {"Yes": 0.8772660226954513, "yes": 0.11243031179360964}, "ground_truth": 0}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9302776304683004, "res": {"Yes": 0.9302776304683004, "yes": 0.06010939060376547}, "ground_truth": 0}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7500361752112382, "res": {"Yes": 0.7500361752112382, "yes": 0.2411241338217801}, "ground_truth": 0}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8250225579925474, "res": {"Yes": 0.8250225579925474, "yes": 0.16694252290975445}, "ground_truth": 0}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8092931553583975, "res": {"Yes": 0.8092931553583975, "yes": 0.17944877281536617}, "ground_truth": 1}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8289891184904484, "res": {"Yes": 0.8289891184904484, "yes": 0.15942141087043088}, "ground_truth": 0}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.782138385920823, "res": {"Yes": 0.782138385920823, "yes": 0.20507391579047937}, "ground_truth": 0}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7748363912343098, "res": {"Yes": 0.7748363912343098, "yes": 0.1761203342927354}, "ground_truth": 0}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9153876428814858, "res": {"Yes": 0.9153876428814858, "yes": 0.07213220955365454}, "ground_truth": 0}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7064851119817318, "res": {"Yes": 0.7064851119817318, "yes": 0.27803101688120857}, "ground_truth": 1}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7625877234274617, "res": {"Yes": 0.7625877234274617, "yes": 0.20901281773965402}, "ground_truth": 0}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8063239936608543, "res": {"Yes": 0.8063239936608543, "yes": 0.19030416850626505}, "ground_truth": 0}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7287699252574709, "res": {"Yes": 0.7287699252574709, "yes": 0.2576969749609028}, "ground_truth": 0}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.391077159339989, "res": {"yes": 0.5984955435760807, "Yes": 0.391077159339989}, "ground_truth": 0}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4139555698991941, "res": {"yes": 0.5616688711361518, "Yes": 0.4139555698991941}, "ground_truth": 1}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6576090323559645, "res": {"Yes": 0.6576090323559645, "yes": 0.3209598312420258}, "ground_truth": 0}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7673304008736367, "res": {"Yes": 0.7673304008736367, "yes": 0.2203211333293898}, "ground_truth": 0}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9123137809709801, "res": {"Yes": 0.9123137809709801, "yes": 0.0850762811196298}, "ground_truth": 0}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9855870780830688, "res": {"Yes": 0.9855870780830688, "yes": 0.011270033257035265}, "ground_truth": 0}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7402030415103151, "res": {"Yes": 0.7402030415103151, "yes": 0.2525969357445937}, "ground_truth": 1}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9843620929282562, "res": {"Yes": 0.9843620929282562, "yes": 0.012403509775623217}, "ground_truth": 0}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.783198611987433, "res": {"Yes": 0.783198611987433, "yes": 0.21258414341094842}, "ground_truth": 0}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9115357815103012, "res": {"Yes": 0.9115357815103012, "yes": 0.08041363722131895}, "ground_truth": 0}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9534286655568902, "res": {"Yes": 0.9534286655568902, "yes": 0.04586314158879271}, "ground_truth": 0}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7511242310947964, "res": {"Yes": 0.7511242310947964, "yes": 0.2345144435567585}, "ground_truth": 1}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8584440264611901, "res": {"Yes": 0.8584440264611901, "yes": 0.1280017939405736}, "ground_truth": 0}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8381722841105317, "res": {"Yes": 0.8381722841105317, " Yes": 0.13634669623733084}, "ground_truth": 0}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7613987855936581, "res": {"Yes": 0.7613987855936581, "yes": 0.2313657701930696}, "ground_truth": 0}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7699686802204777, "res": {"Yes": 0.7699686802204777, "yes": 0.22259764938772927}, "ground_truth": 0}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8055075929728815, "res": {"Yes": 0.8055075929728815, "yes": 0.18973132244283814}, "ground_truth": 1}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9785508952084401, "res": {"Yes": 0.9785508952084401, "yes": 0.017289716088202182}, "ground_truth": 0}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9118842735469808, "res": {"Yes": 0.9118842735469808, "yes": 0.08613245441104476}, "ground_truth": 0}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8514158494592187, "res": {"Yes": 0.8514158494592187, "yes": 0.14487195437157055}, "ground_truth": 0}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9465220142679635, "res": {"Yes": 0.9465220142679635, "yes": 0.050461656920118086}, "ground_truth": 0}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8292655495410487, "res": {"Yes": 0.8292655495410487, "yes": 0.16728259685175156}, "ground_truth": 1}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8404750762947227, "res": {"Yes": 0.8404750762947227, "yes": 0.15629167592666615}, "ground_truth": 0}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7629269444910023, "res": {"Yes": 0.7629269444910023, "yes": 0.23384948431694363}, "ground_truth": 0}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.750344668512169, "res": {"Yes": 0.750344668512169, "yes": 0.2387526977998268}, "ground_truth": 0}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6891394594222602, "res": {"Yes": 0.6891394594222602, "yes": 0.30339681797802814}, "ground_truth": 0}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.96139515601972, "res": {"Yes": 0.96139515601972, "yes": 0.03509116112580066}, "ground_truth": 1}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8892329782759785, "res": {"Yes": 0.8892329782759785, "yes": 0.10676184210052722}, "ground_truth": 0}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6482635580074797, "res": {"Yes": 0.6482635580074797, "yes": 0.35054573359729974}, "ground_truth": 0}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8698786498440768, "res": {"Yes": 0.8698786498440768, "yes": 0.1243362860371932}, "ground_truth": 0}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9604000140554915, "res": {"Yes": 0.9604000140554915, "yes": 0.031402565047295876}, "ground_truth": 0}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9678973410149968, "res": {"Yes": 0.9678973410149968, "yes": 0.02719705999340291}, "ground_truth": 1}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9606309719256992, "res": {"Yes": 0.9606309719256992, "yes": 0.02537143358176307}, "ground_truth": 0}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9768551401440011, "res": {"Yes": 0.9768551401440011, "yes": 0.019878111047491414}, "ground_truth": 0}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6828565300286057, "res": {"Yes": 0.6828565300286057, "yes": 0.2919295129094006}, "ground_truth": 0}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5471349651361129, "res": {"Yes": 0.5471349651361129, "yes": 0.4345904698305456}, "ground_truth": 0}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6490844743255962, "res": {"Yes": 0.6490844743255962, "yes": 0.31152857855595606}, "ground_truth": 1}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6102683930346374, "res": {"Yes": 0.6102683930346374, "yes": 0.36938233247007046}, "ground_truth": 0}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8349007278512612, "res": {"Yes": 0.8349007278512612, "yes": 0.16292066735458446}, "ground_truth": 0}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6778639533176105, "res": {"Yes": 0.6778639533176105, "yes": 0.31327837710102013}, "ground_truth": 0}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6666452990812138, "res": {"Yes": 0.6666452990812138, "yes": 0.3252717799861953}, "ground_truth": 0}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7676640933214732, "res": {"Yes": 0.7676640933214732, "yes": 0.22515989155988797}, "ground_truth": 1}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9536835987873079, "res": {"Yes": 0.9536835987873079, "yes": 0.04136552394647196}, "ground_truth": 0}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7615585971260272, "res": {"Yes": 0.7615585971260272, "yes": 0.2287136944979381}, "ground_truth": 0}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7170169489205862, "res": {"Yes": 0.7170169489205862, "yes": 0.26986449940775586}, "ground_truth": 0}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8030430219058462, "res": {"Yes": 0.8030430219058462, "yes": 0.1901125336087846}, "ground_truth": 0}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8229473118316812, "res": {"Yes": 0.8229473118316812, "yes": 0.1694686881338855}, "ground_truth": 1}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8617825778552896, "res": {"Yes": 0.8617825778552896, "yes": 0.13117158548171418}, "ground_truth": 0}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.769134163349446, "res": {"Yes": 0.769134163349446, "yes": 0.21581776671624125}, "ground_truth": 0}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.509047574600876, "res": {"Yes": 0.509047574600876, "yes": 0.4880750470569095}, "ground_truth": 0}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9838547014115412, "res": {"Yes": 0.9838547014115412, "yes": 0.012007938575046027}, "ground_truth": 0}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.923287664982339, "res": {"Yes": 0.923287664982339, "yes": 0.07089878375241565}, "ground_truth": 1}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9672384495578306, "res": {"Yes": 0.9672384495578306, "yes": 0.026796060460850767}, "ground_truth": 0}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8060210662745579, "res": {"Yes": 0.8060210662745579, "yes": 0.18994116740661032}, "ground_truth": 0}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8711402877014247, "res": {"Yes": 0.8711402877014247, "yes": 0.12469506525677311}, "ground_truth": 0}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8516308250397989, "res": {"Yes": 0.8516308250397989, "yes": 0.14419128396915848}, "ground_truth": 0}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8489603633827716, "res": {"Yes": 0.8489603633827716, "yes": 0.14614993970215898}, "ground_truth": 1}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.49036218885195554, "res": {"yes": 0.5051353529377095, "Yes": 0.49036218885195554}, "ground_truth": 0}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7727677177918647, "res": {"Yes": 0.7727677177918647, "yes": 0.2230285698073746}, "ground_truth": 0}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7166827463591713, "res": {"Yes": 0.7166827463591713, "yes": 0.26479887590501233}, "ground_truth": 0}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6518853344043325, "res": {"Yes": 0.6518853344043325, "yes": 0.33642760304956876}, "ground_truth": 0}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8037841065932347, "res": {"Yes": 0.8037841065932347, "yes": 0.19154664940519067}, "ground_truth": 1}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9639726443543601, "res": {"Yes": 0.9639726443543601, "yes": 0.031013648413028012}, "ground_truth": 0}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8801622618864735, "res": {"Yes": 0.8801622618864735, "yes": 0.11328920426479425}, "ground_truth": 0}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.41888305626879063, "res": {"yes": 0.49465432188124425, "Yes": 0.41888305626879063}, "ground_truth": 0}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.4186649169041101, "res": {"Yes": 0.4186649169041101, "yes": 0.3919023646441643}, "ground_truth": 0}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3755882708141423, "res": {"yes": 0.5027024171299806, "Yes": 0.3755882708141423}, "ground_truth": 1}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3232653422390562, "res": {"yes": 0.6126156120547614, "Yes": 0.3232653422390562}, "ground_truth": 0}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.46011823036086824, "res": {"Yes": 0.46011823036086824, "yes": 0.4084979267378907}, "ground_truth": 0}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.924498851914971, "res": {"Yes": 0.924498851914971, "yes": 0.0553627552069215}, "ground_truth": 0}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9353347185516525, "res": {"Yes": 0.9353347185516525, "yes": 0.049910279272217244}, "ground_truth": 0}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9334406686259068, "res": {"Yes": 0.9334406686259068, "yes": 0.05635715907399878}, "ground_truth": 1}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9463442362702357, "res": {"Yes": 0.9463442362702357, "yes": 0.04311039188129872}, "ground_truth": 0}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9117696933683902, "res": {"Yes": 0.9117696933683902, "yes": 0.07731910499849998}, "ground_truth": 0}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8962711670474209, "res": {"Yes": 0.8962711670474209, "yes": 0.0951013798568788}, "ground_truth": 0}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9419127177641134, "res": {"Yes": 0.9419127177641134, "yes": 0.05345523523013637}, "ground_truth": 0}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8994098886385506, "res": {"Yes": 0.8994098886385506, "yes": 0.09327501491233395}, "ground_truth": 1}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9141431742572319, "res": {"Yes": 0.9141431742572319, "yes": 0.07947292351905146}, "ground_truth": 0}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9639532561675422, "res": {"Yes": 0.9639532561675422, "yes": 0.03479767864604651}, "ground_truth": 0}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8115677484931605, "res": {"Yes": 0.8115677484931605, "yes": 0.17552756199543318}, "ground_truth": 0}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8776038351389117, "res": {"Yes": 0.8776038351389117, "yes": 0.11103557872213657}, "ground_truth": 0}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.89967814165732, "res": {"Yes": 0.89967814165732, "yes": 0.0957389559626943}, "ground_truth": 1}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8369677748587201, "res": {"Yes": 0.8369677748587201, "yes": 0.15326436083623593}, "ground_truth": 0}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8820530300584014, "res": {"Yes": 0.8820530300584014, "yes": 0.1077232328376031}, "ground_truth": 0}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8968808207682678, "res": {"Yes": 0.8968808207682678, "yes": 0.09989099874897266}, "ground_truth": 0}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8802582400121419, "res": {"Yes": 0.8802582400121419, "yes": 0.11053802495270838}, "ground_truth": 0}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9903507358093913, "res": {"Yes": 0.9903507358093913, "yes": 0.006892551108534114}, "ground_truth": 1}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9211010657022146, "res": {"Yes": 0.9211010657022146, "yes": 0.0708030854096263}, "ground_truth": 0}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8773371279880938, "res": {"Yes": 0.8773371279880938, "yes": 0.11543234248577251}, "ground_truth": 0}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9791512421693673, "res": {"Yes": 0.9791512421693673, "yes": 0.018176931297453146}, "ground_truth": 0}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9604316847285381, "res": {"Yes": 0.9604316847285381, "yes": 0.03309356918221599}, "ground_truth": 0}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9601758713272296, "res": {"Yes": 0.9601758713272296, "yes": 0.03626714659492563}, "ground_truth": 1}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9529529632993425, "res": {"Yes": 0.9529529632993425, "yes": 0.042660152123226096}, "ground_truth": 0}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.972962258836616, "res": {"Yes": 0.972962258836616, "yes": 0.02455028784409391}, "ground_truth": 0}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.941144284612881, "res": {"Yes": 0.941144284612881, "yes": 0.05417842567805187}, "ground_truth": 0}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8121210285409642, "res": {"Yes": 0.8121210285409642, "yes": 0.18336429900916448}, "ground_truth": 0}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9702531074567872, "res": {"Yes": 0.9702531074567872, "yes": 0.027906813737209307}, "ground_truth": 1}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9434290903120054, "res": {"Yes": 0.9434290903120054, "yes": 0.04808541357209724}, "ground_truth": 0}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7294580338157245, "res": {"Yes": 0.7294580338157245, "yes": 0.26245941983648474}, "ground_truth": 0}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7713978965555265, "res": {"Yes": 0.7713978965555265, "yes": 0.22203190305052153}, "ground_truth": 0}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9570951032786288, "res": {"Yes": 0.9570951032786288, "yes": 0.03317624404481618}, "ground_truth": 0}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7615200175447259, "res": {"Yes": 0.7615200175447259, "yes": 0.22714016161440065}, "ground_truth": 1}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7644877326355776, "res": {"Yes": 0.7644877326355776, "yes": 0.22800359103459414}, "ground_truth": 0}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9683358629760052, "res": {"Yes": 0.9683358629760052, "yes": 0.021557650798210433}, "ground_truth": 0}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9441699521345336, "res": {"Yes": 0.9441699521345336, "yes": 0.05183617574553724}, "ground_truth": 0}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9748984599974201, "res": {"Yes": 0.9748984599974201, "yes": 0.021506746551032282}, "ground_truth": 0}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9689526045506724, "res": {"Yes": 0.9689526045506724, "yes": 0.029055920867929668}, "ground_truth": 1}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.38344378615539154, "res": {"yes": 0.611120232072273, "Yes": 0.38344378615539154}, "ground_truth": 0}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9422143278769398, "res": {"Yes": 0.9422143278769398, "yes": 0.05375309673282571}, "ground_truth": 0}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8898606202328239, "res": {"Yes": 0.8898606202328239, "yes": 0.10186149302650763}, "ground_truth": 0}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9120398314737105, "res": {"Yes": 0.9120398314737105, "yes": 0.07685930779007753}, "ground_truth": 0}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8912485982909012, "res": {"Yes": 0.8912485982909012, "yes": 0.10240794310503207}, "ground_truth": 1}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9603693460915053, "res": {"Yes": 0.9603693460915053, "yes": 0.034848597029120555}, "ground_truth": 0}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9726097328690089, "res": {"Yes": 0.9726097328690089, "yes": 0.02236092417344377}, "ground_truth": 0}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7444293107419723, "res": {"Yes": 0.7444293107419723, "yes": 0.24607623319979813}, "ground_truth": 0}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8825366158387532, "res": {"Yes": 0.8825366158387532, "yes": 0.1119414849520376}, "ground_truth": 0}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6226336592726225, "res": {"Yes": 0.6226336592726225, "yes": 0.37466750487060096}, "ground_truth": 1}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7999511385435798, "res": {"Yes": 0.7999511385435798, "yes": 0.19625420417525669}, "ground_truth": 0}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8094137166460015, "res": {"Yes": 0.8094137166460015, "yes": 0.1839892402472707}, "ground_truth": 0}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.912928769046298, "res": {"Yes": 0.912928769046298, "yes": 0.07395008798763536}, "ground_truth": 0}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9437975146851526, "res": {"Yes": 0.9437975146851526, "yes": 0.04647447694422929}, "ground_truth": 0}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8972967066262254, "res": {"Yes": 0.8972967066262254, "yes": 0.0885818761874152}, "ground_truth": 1}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9123888036177032, "res": {"Yes": 0.9123888036177032, "yes": 0.07124414775348367}, "ground_truth": 0}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9078858446823088, "res": {"Yes": 0.9078858446823088, "yes": 0.08488506618651898}, "ground_truth": 0}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8428891704724158, "res": {"Yes": 0.8428891704724158, "yes": 0.15042303815124877}, "ground_truth": 0}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9094025623210715, "res": {"Yes": 0.9094025623210715, "yes": 0.05882869105328462}, "ground_truth": 0}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7375280924765483, "res": {"Yes": 0.7375280924765483, "yes": 0.23403404096391892}, "ground_truth": 1}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9328337338163368, "res": {"Yes": 0.9328337338163368, "yes": 0.06158804512027617}, "ground_truth": 0}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6256480415127723, "res": {"Yes": 0.6256480415127723, "yes": 0.33937088961464684}, "ground_truth": 0}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.599093439826369, "res": {"Yes": 0.599093439826369, "yes": 0.36672211410868777}, "ground_truth": 0}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7681775635642808, "res": {"Yes": 0.7681775635642808, "yes": 0.21661389412595297}, "ground_truth": 0}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7252558643811569, "res": {"Yes": 0.7252558643811569, "yes": 0.26614572017297233}, "ground_truth": 1}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7691519536283881, "res": {"Yes": 0.7691519536283881, "yes": 0.21548424094020255}, "ground_truth": 0}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.35527590293405625, "res": {"yes": 0.6214549665181244, "Yes": 0.35527590293405625}, "ground_truth": 0}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6671911449387293, "res": {"Yes": 0.6671911449387293, "yes": 0.32590993786539424}, "ground_truth": 0}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7004231686139265, "res": {"Yes": 0.7004231686139265, "yes": 0.29606856883960114}, "ground_truth": 0}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6959641281432613, "res": {"Yes": 0.6959641281432613, "yes": 0.2960310889313143}, "ground_truth": 1}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5656631609108592, "res": {"Yes": 0.5656631609108592, "yes": 0.4241686896180028}, "ground_truth": 0}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5639754820339528, "res": {"Yes": 0.5639754820339528, "yes": 0.4280461765861444}, "ground_truth": 0}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8938440497685461, "res": {"Yes": 0.8938440497685461, "yes": 0.10175629390624899}, "ground_truth": 0}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.864928685463382, "res": {"Yes": 0.864928685463382, "yes": 0.12125951233431351}, "ground_truth": 0}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7996885017116044, "res": {"Yes": 0.7996885017116044, "yes": 0.19573636533464508}, "ground_truth": 1}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8408509703527088, "res": {"Yes": 0.8408509703527088, "yes": 0.15242370548866715}, "ground_truth": 0}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9336437756722098, "res": {"Yes": 0.9336437756722098, "yes": 0.060931755706290225}, "ground_truth": 0}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.43520830519250947, "res": {"yes": 0.5601946311563906, "Yes": 0.43520830519250947}, "ground_truth": 0}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5026049023202998, "res": {"Yes": 0.5026049023202998, "yes": 0.4906985455115529}, "ground_truth": 0}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6171207680200299, "res": {"Yes": 0.6171207680200299, "yes": 0.37222342505999556}, "ground_truth": 1}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7082605390307094, "res": {"Yes": 0.7082605390307094, "yes": 0.2875210511597707}, "ground_truth": 0}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6982057422376937, "res": {"Yes": 0.6982057422376937, "yes": 0.2950651498725023}, "ground_truth": 0}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.96204623128725, "res": {"Yes": 0.96204623128725, "yes": 0.034972088747280264}, "ground_truth": 0}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9752771431411025, "res": {"Yes": 0.9752771431411025, "yes": 0.022189845603352257}, "ground_truth": 0}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9689471202944511, "res": {"Yes": 0.9689471202944511, "yes": 0.02776767235126926}, "ground_truth": 1}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9489032480925594, "res": {"Yes": 0.9489032480925594, "yes": 0.0475078651283118}, "ground_truth": 0}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9797157141530445, "res": {"Yes": 0.9797157141530445, "yes": 0.014222728434235115}, "ground_truth": 0}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6723323111830778, "res": {"Yes": 0.6723323111830778, "yes": 0.32196249995074194}, "ground_truth": 0}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7225238855190982, "res": {"Yes": 0.7225238855190982, "yes": 0.2716274012857699}, "ground_truth": 0}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.669955337008784, "res": {"Yes": 0.669955337008784, "yes": 0.30697109159939984}, "ground_truth": 1}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8239789380650587, "res": {"Yes": 0.8239789380650587, "yes": 0.16486950446896476}, "ground_truth": 0}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9824877534483685, "res": {"Yes": 0.9824877534483685, "yes": 0.011775918946019334}, "ground_truth": 0}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7951735549820823, "res": {"Yes": 0.7951735549820823, "yes": 0.18752115748830345}, "ground_truth": 0}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9226223206011739, "res": {"Yes": 0.9226223206011739, "yes": 0.06706105539483313}, "ground_truth": 0}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8201721678252618, "res": {"Yes": 0.8201721678252618, "yes": 0.16753313942044004}, "ground_truth": 1}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8814940473777216, "res": {"Yes": 0.8814940473777216, "yes": 0.10274703319882772}, "ground_truth": 0}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8577550583755942, "res": {"Yes": 0.8577550583755942, "yes": 0.13271259882339737}, "ground_truth": 0}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9073326840810689, "res": {"Yes": 0.9073326840810689, "yes": 0.08771332085282808}, "ground_truth": 0}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8699841203250753, "res": {"Yes": 0.8699841203250753, "yes": 0.12297963578995821}, "ground_truth": 0}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9205149290745005, "res": {"Yes": 0.9205149290745005, "yes": 0.07714031058185178}, "ground_truth": 1}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8952153778637342, "res": {"Yes": 0.8952153778637342, "yes": 0.09920000641737817}, "ground_truth": 0}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9504662103766106, "res": {"Yes": 0.9504662103766106, "yes": 0.04477316565218425}, "ground_truth": 0}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8136117031081693, "res": {"Yes": 0.8136117031081693, "yes": 0.17297354763833375}, "ground_truth": 0}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7254451591031319, "res": {"Yes": 0.7254451591031319, "yes": 0.26908316058508064}, "ground_truth": 0}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8308436405612768, "res": {"Yes": 0.8308436405612768, "yes": 0.1612500360081377}, "ground_truth": 1}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8564742293458906, "res": {"Yes": 0.8564742293458906, "yes": 0.13735000912565162}, "ground_truth": 0}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8174685482823079, "res": {"Yes": 0.8174685482823079, "yes": 0.17653355629991965}, "ground_truth": 0}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8583734910135243, "res": {"Yes": 0.8583734910135243, "yes": 0.13370192460853803}, "ground_truth": 0}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.774853941477327, "res": {"Yes": 0.774853941477327, "yes": 0.21366886973724386}, "ground_truth": 0}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9138559626911963, "res": {"Yes": 0.9138559626911963, "yes": 0.08161880419450533}, "ground_truth": 1}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7436778390168289, "res": {"Yes": 0.7436778390168289, "yes": 0.25028723015602344}, "ground_truth": 0}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8798023491285382, "res": {"Yes": 0.8798023491285382, "yes": 0.11350107167894655}, "ground_truth": 0}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8480214573767874, "res": {"Yes": 0.8480214573767874, "yes": 0.15004284385036906}, "ground_truth": 0}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8123114038990125, "res": {"Yes": 0.8123114038990125, "yes": 0.18305064789122558}, "ground_truth": 0}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7629225195275566, "res": {"Yes": 0.7629225195275566, "yes": 0.22953076641807793}, "ground_truth": 1}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7093865049586324, "res": {"Yes": 0.7093865049586324, "yes": 0.281700451875279}, "ground_truth": 0}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6319132543696095, "res": {"Yes": 0.6319132543696095, "yes": 0.36079332194015856}, "ground_truth": 0}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5069829019115284, "res": {"Yes": 0.5069829019115284, "yes": 0.39523085569693367}, "ground_truth": 0}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.4116405754789865, "res": {"yes": 0.5201411562067245, "Yes": 0.4116405754789865}, "ground_truth": 0}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5542864600333745, "res": {"Yes": 0.5542864600333745, "yes": 0.3843148386518463}, "ground_truth": 1}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5927829009307666, "res": {"Yes": 0.5927829009307666, "yes": 0.36024493639851446}, "ground_truth": 0}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7112606642364075, "res": {"Yes": 0.7112606642364075, "yes": 0.2441028075526103}, "ground_truth": 0}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8258514700267272, "res": {"Yes": 0.8258514700267272, "yes": 0.16320521211221542}, "ground_truth": 0}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8765462844721794, "res": {"Yes": 0.8765462844721794, "yes": 0.1031708940616666}, "ground_truth": 0}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8789946919960546, "res": {"Yes": 0.8789946919960546, "yes": 0.1100562478269598}, "ground_truth": 1}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9305409790986239, "res": {"Yes": 0.9305409790986239, "yes": 0.06376030251004894}, "ground_truth": 0}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8488024714395883, "res": {"Yes": 0.8488024714395883, "yes": 0.13762407570524904}, "ground_truth": 0}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.924063687361326, "res": {"Yes": 0.924063687361326, "yes": 0.06878275634343646}, "ground_truth": 0}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9263780002654036, "res": {"Yes": 0.9263780002654036, "yes": 0.06276359945933759}, "ground_truth": 0}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9269428275700116, "res": {"Yes": 0.9269428275700116, "yes": 0.06547973326640147}, "ground_truth": 1}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9156356229384178, "res": {"Yes": 0.9156356229384178, "yes": 0.07757611465585282}, "ground_truth": 0}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8824814590239317, "res": {"Yes": 0.8824814590239317, "yes": 0.10578498376696042}, "ground_truth": 0}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7401908874761585, "res": {"Yes": 0.7401908874761585, "yes": 0.24925079015869467}, "ground_truth": 0}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5904861135783389, "res": {"Yes": 0.5904861135783389, "yes": 0.3963954321213459}, "ground_truth": 0}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7934904772760779, "res": {"Yes": 0.7934904772760779, "yes": 0.1956407127786587}, "ground_truth": 1}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8187818107197046, "res": {"Yes": 0.8187818107197046, "yes": 0.17139324231430275}, "ground_truth": 0}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8531522497670779, "res": {"Yes": 0.8531522497670779, "yes": 0.14113797795949468}, "ground_truth": 0}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8634236280074791, "res": {"Yes": 0.8634236280074791, "yes": 0.11984756923914429}, "ground_truth": 0}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9236541536425986, "res": {"Yes": 0.9236541536425986, "yes": 0.06461407581858622}, "ground_truth": 0}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8346034309013338, "res": {"Yes": 0.8346034309013338, "yes": 0.15651224957817678}, "ground_truth": 1}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9505886753816543, "res": {"Yes": 0.9505886753816543, "yes": 0.03873679327439394}, "ground_truth": 0}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9820421686017398, "res": {"Yes": 0.9820421686017398, "yes": 0.011250732965107917}, "ground_truth": 0}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8439547749536527, "res": {"Yes": 0.8439547749536527, "yes": 0.15039575388688575}, "ground_truth": 0}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9864448231645848, "res": {"Yes": 0.9864448231645848, "yes": 0.00865844615790916}, "ground_truth": 0}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7784668270375364, "res": {"Yes": 0.7784668270375364, "yes": 0.21297020621101406}, "ground_truth": 1}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7879981790320193, "res": {"Yes": 0.7879981790320193, "yes": 0.19473208270080175}, "ground_truth": 0}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8681380180430628, "res": {"Yes": 0.8681380180430628, "yes": 0.1154362326212627}, "ground_truth": 0}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6383964842429108, "res": {"Yes": 0.6383964842429108, "yes": 0.30898081233984004}, "ground_truth": 0}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6952506019950313, "res": {"Yes": 0.6952506019950313, "yes": 0.27154342707637413}, "ground_truth": 0}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6096211380085474, "res": {"Yes": 0.6096211380085474, "yes": 0.34815921133305966}, "ground_truth": 1}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7544604658199484, "res": {"Yes": 0.7544604658199484, "yes": 0.21567410150402602}, "ground_truth": 0}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6946729984272714, "res": {"Yes": 0.6946729984272714, "yes": 0.3013465342095768}, "ground_truth": 0}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9287108660394693, "res": {"Yes": 0.9287108660394693, "yes": 0.06700032559339959}, "ground_truth": 0}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8985821920326419, "res": {"Yes": 0.8985821920326419, "yes": 0.0937058648419657}, "ground_truth": 0}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9014478915054421, "res": {"Yes": 0.9014478915054421, "yes": 0.09327280432067611}, "ground_truth": 1}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8786078524702767, "res": {"Yes": 0.8786078524702767, "yes": 0.10947331040814792}, "ground_truth": 0}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8706701121225681, "res": {"Yes": 0.8706701121225681, "yes": 0.11816367964937818}, "ground_truth": 0}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7118427134854406, "res": {"Yes": 0.7118427134854406, "yes": 0.2678621222693913}, "ground_truth": 0}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7707053534535376, "res": {"Yes": 0.7707053534535376, "yes": 0.21336520585727245}, "ground_truth": 0}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8574990995757739, "res": {"Yes": 0.8574990995757739, "yes": 0.12843725266706338}, "ground_truth": 1}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9217501455147328, "res": {"Yes": 0.9217501455147328, "yes": 0.06924442794777586}, "ground_truth": 0}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8646622852162298, "res": {"Yes": 0.8646622852162298, "yes": 0.12432786875555671}, "ground_truth": 0}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8493591202986149, "res": {"Yes": 0.8493591202986149, "yes": 0.13806661402030523}, "ground_truth": 0}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8632141350487154, "res": {"Yes": 0.8632141350487154, "yes": 0.12391406225194813}, "ground_truth": 0}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7686853046158829, "res": {"Yes": 0.7686853046158829, "yes": 0.21515230635528493}, "ground_truth": 1}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8678665069375696, "res": {"Yes": 0.8678665069375696, "yes": 0.11666947990337641}, "ground_truth": 0}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.942355629162914, "res": {"Yes": 0.942355629162914, "yes": 0.05157862172493749}, "ground_truth": 0}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.556252305940093, "res": {"Yes": 0.556252305940093, "yes": 0.4335445022562285}, "ground_truth": 0}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.47635731718725915, "res": {"yes": 0.5154522800670586, "Yes": 0.47635731718725915}, "ground_truth": 0}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4440599735982834, "res": {"yes": 0.5486698963548987, "Yes": 0.4440599735982834}, "ground_truth": 1}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6570728525257624, "res": {"Yes": 0.6570728525257624, "yes": 0.33774989939841765}, "ground_truth": 0}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5643426931711085, "res": {"Yes": 0.5643426931711085, "yes": 0.4272975371918284}, "ground_truth": 0}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8602525242391903, "res": {"Yes": 0.8602525242391903, "yes": 0.13219678117602046}, "ground_truth": 0}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7427180361737216, "res": {"Yes": 0.7427180361737216, "yes": 0.23545084816131406}, "ground_truth": 0}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8374599310079942, "res": {"Yes": 0.8374599310079942, "yes": 0.15137773644709027}, "ground_truth": 1}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8834444889849519, "res": {"Yes": 0.8834444889849519, "yes": 0.11064787692184228}, "ground_truth": 0}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8336058668833031, "res": {"Yes": 0.8336058668833031, "yes": 0.15745631882124786}, "ground_truth": 0}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5772058598117181, "res": {"Yes": 0.5772058598117181, "yes": 0.4170411420748982}, "ground_truth": 0}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.31042753080576674, "res": {"yes": 0.6787800745407094, "Yes": 0.31042753080576674}, "ground_truth": 0}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.32079037710345043, "res": {"yes": 0.6371312185754581, "Yes": 0.32079037710345043}, "ground_truth": 1}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.44779543909849556, "res": {"yes": 0.5465484610497917, "Yes": 0.44779543909849556}, "ground_truth": 0}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.35014255759904056, "res": {"yes": 0.6408120556298967, "Yes": 0.35014255759904056}, "ground_truth": 0}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6468491012790238, "res": {"Yes": 0.6468491012790238, "yes": 0.347219095204308}, "ground_truth": 0}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7005799268496238, "res": {"Yes": 0.7005799268496238, "yes": 0.26958986321547146}, "ground_truth": 0}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5071173208952742, "res": {"Yes": 0.5071173208952742, "yes": 0.48585350527303106}, "ground_truth": 1}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7914673668594794, "res": {"Yes": 0.7914673668594794, "yes": 0.20221265253426943}, "ground_truth": 0}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5630633978435557, "res": {"Yes": 0.5630633978435557, "yes": 0.4334099205874794}, "ground_truth": 0}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5438831788218168, "res": {"Yes": 0.5438831788218168, "yes": 0.44574604275554974}, "ground_truth": 0}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.739209823038794, "res": {"Yes": 0.739209823038794, "yes": 0.24721847973083702}, "ground_truth": 0}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8349098700642847, "res": {"Yes": 0.8349098700642847, "yes": 0.16164517883004537}, "ground_truth": 1}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8855988731523928, "res": {"Yes": 0.8855988731523928, "yes": 0.10895332494232919}, "ground_truth": 0}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.639351907647399, "res": {"Yes": 0.639351907647399, "yes": 0.34911859526175004}, "ground_truth": 0}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8526294447518948, "res": {"Yes": 0.8526294447518948, "yes": 0.14253203464669145}, "ground_truth": 0}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.910183064429869, "res": {"Yes": 0.910183064429869, "yes": 0.08588097096232612}, "ground_truth": 0}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8926264006304682, "res": {"Yes": 0.8926264006304682, "yes": 0.10197969032342609}, "ground_truth": 1}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9193653260437004, "res": {"Yes": 0.9193653260437004, "yes": 0.07331326630792888}, "ground_truth": 0}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8284731587875095, "res": {"Yes": 0.8284731587875095, "yes": 0.16069969567284245}, "ground_truth": 0}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8073474969399463, "res": {"Yes": 0.8073474969399463, "yes": 0.18439621391218666}, "ground_truth": 0}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7133317780075317, "res": {"Yes": 0.7133317780075317, "yes": 0.2755644333158001}, "ground_truth": 0}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7597430917653897, "res": {"Yes": 0.7597430917653897, "yes": 0.22587073779471742}, "ground_truth": 1}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8078772260210966, "res": {"Yes": 0.8078772260210966, "yes": 0.1797370013780017}, "ground_truth": 0}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.643432237835236, "res": {"Yes": 0.643432237835236, "yes": 0.3468822825371951}, "ground_truth": 0}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6909073259882766, "res": {"Yes": 0.6909073259882766, "yes": 0.29970215924423627}, "ground_truth": 0}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7543342174251225, "res": {"Yes": 0.7543342174251225, "yes": 0.23853340871361942}, "ground_truth": 0}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6956513625669903, "res": {"Yes": 0.6956513625669903, "yes": 0.2981024975811297}, "ground_truth": 1}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.786110052854484, "res": {"Yes": 0.786110052854484, "yes": 0.20925432516112613}, "ground_truth": 0}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.795482197867213, "res": {"Yes": 0.795482197867213, "yes": 0.1939769386350446}, "ground_truth": 0}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9586420952134238, "res": {"Yes": 0.9586420952134238, "yes": 0.03237929835672522}, "ground_truth": 0}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9560758730164581, "res": {"Yes": 0.9560758730164581, "yes": 0.03887332485335903}, "ground_truth": 0}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9083037408176095, "res": {"Yes": 0.9083037408176095, "yes": 0.08074095786930167}, "ground_truth": 1}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9247243739323613, "res": {"Yes": 0.9247243739323613, "yes": 0.06467488687770603}, "ground_truth": 0}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9120701655109026, "res": {"Yes": 0.9120701655109026, "yes": 0.07393445659133158}, "ground_truth": 0}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7611412403380281, "res": {"Yes": 0.7611412403380281, "yes": 0.20996981430525533}, "ground_truth": 0}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.776108786372554, "res": {"Yes": 0.776108786372554, "yes": 0.20272323592544145}, "ground_truth": 0}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8089564785269253, "res": {"Yes": 0.8089564785269253, "yes": 0.14977582541411896}, "ground_truth": 1}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7131574182339433, "res": {"Yes": 0.7131574182339433, "yes": 0.24930849839608715}, "ground_truth": 0}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7328195115157122, "res": {"Yes": 0.7328195115157122, "yes": 0.13587675735621563}, "ground_truth": 0}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8696565372949141, "res": {"Yes": 0.8696565372949141, "yes": 0.11892776345234801}, "ground_truth": 0}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8603530162056656, "res": {"Yes": 0.8603530162056656, "yes": 0.12007194190055011}, "ground_truth": 0}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8029489428966556, "res": {"Yes": 0.8029489428966556, "yes": 0.18268827388610756}, "ground_truth": 1}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8982494547520936, "res": {"Yes": 0.8982494547520936, "yes": 0.09640959973599769}, "ground_truth": 0}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9120147370429857, "res": {"Yes": 0.9120147370429857, "yes": 0.08001453286452653}, "ground_truth": 0}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8575469665114659, "res": {"Yes": 0.8575469665114659, "yes": 0.1367688095712291}, "ground_truth": 0}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8140209957561622, "res": {"Yes": 0.8140209957561622, "yes": 0.18146105886585848}, "ground_truth": 0}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8518433078846672, "res": {"Yes": 0.8518433078846672, "yes": 0.14271888823700074}, "ground_truth": 1}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8308573163601519, "res": {"Yes": 0.8308573163601519, "yes": 0.16450620321072587}, "ground_truth": 0}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8367088905541787, "res": {"Yes": 0.8367088905541787, "yes": 0.1592115114632057}, "ground_truth": 0}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9691708953995488, "res": {"Yes": 0.9691708953995488, "yes": 0.022825156408873618}, "ground_truth": 0}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8828103284669071, "res": {"Yes": 0.8828103284669071, "yes": 0.11003407372713829}, "ground_truth": 0}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8624175662556549, "res": {"Yes": 0.8624175662556549, "yes": 0.12735141073465575}, "ground_truth": 1}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8357926357721668, "res": {"Yes": 0.8357926357721668, "yes": 0.15703075517247653}, "ground_truth": 0}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8334657245103028, "res": {"Yes": 0.8334657245103028, "yes": 0.15386805954425592}, "ground_truth": 0}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7458281479460311, "res": {"Yes": 0.7458281479460311, "yes": 0.24513428905688758}, "ground_truth": 0}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8045417405940278, "res": {"Yes": 0.8045417405940278, "yes": 0.19087429671154327}, "ground_truth": 0}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8423617964782881, "res": {"Yes": 0.8423617964782881, "yes": 0.15073936396398327}, "ground_truth": 1}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8741129047794358, "res": {"Yes": 0.8741129047794358, "yes": 0.1231213284044367}, "ground_truth": 0}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9190258910084147, "res": {"Yes": 0.9190258910084147, "yes": 0.07752384597110022}, "ground_truth": 0}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8162492885498689, "res": {"Yes": 0.8162492885498689, "yes": 0.1758857047021911}, "ground_truth": 0}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6835963138614817, "res": {"Yes": 0.6835963138614817, "yes": 0.3024540239513838}, "ground_truth": 0}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.44552496298710953, "res": {"yes": 0.541704299120674, "Yes": 0.44552496298710953}, "ground_truth": 1}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6850465752264632, "res": {"Yes": 0.6850465752264632, "yes": 0.2960730691160495}, "ground_truth": 0}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9105929028854305, "res": {"Yes": 0.9105929028854305, "yes": 0.07967370279628849}, "ground_truth": 0}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.96603912839989, "res": {"Yes": 0.96603912839989, "yes": 0.02203473515605582}, "ground_truth": 0}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.957282636678624, "res": {"Yes": 0.957282636678624, "yes": 0.031975461451093166}, "ground_truth": 0}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7084387810547255, "res": {"Yes": 0.7084387810547255, "yes": 0.28940213137655624}, "ground_truth": 1}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4938256101547805, "res": {"Yes": 0.4938256101547805, "yes": 0.49358406013195666}, "ground_truth": 0}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7685435336866724, "res": {"Yes": 0.7685435336866724, "yes": 0.22405386215117146}, "ground_truth": 0}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8478354642696515, "res": {"Yes": 0.8478354642696515, "yes": 0.14514828557256895}, "ground_truth": 0}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8431062929669974, "res": {"Yes": 0.8431062929669974, "yes": 0.15206821133729231}, "ground_truth": 0}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5254381148077488, "res": {"Yes": 0.5254381148077488, "yes": 0.4644330567776241}, "ground_truth": 1}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5157163881364271, "res": {"Yes": 0.5157163881364271, "yes": 0.4769005501893357}, "ground_truth": 0}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6616831916863439, "res": {"Yes": 0.6616831916863439, "yes": 0.3335523348003382}, "ground_truth": 0}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8914996003218664, "res": {"Yes": 0.8914996003218664, "yes": 0.09978204716228402}, "ground_truth": 0}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9064681042592736, "res": {"Yes": 0.9064681042592736, "yes": 0.08664791720343058}, "ground_truth": 0}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8664089094929059, "res": {"Yes": 0.8664089094929059, "yes": 0.12895125683725928}, "ground_truth": 1}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8911625399163932, "res": {"Yes": 0.8911625399163932, "yes": 0.1027241539822297}, "ground_truth": 0}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.864007897149953, "res": {"Yes": 0.864007897149953, "yes": 0.12851629157477934}, "ground_truth": 0}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9342191932559409, "res": {"Yes": 0.9342191932559409, "yes": 0.06028899476190925}, "ground_truth": 0}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8825256636273093, "res": {"Yes": 0.8825256636273093, "yes": 0.11312272360423561}, "ground_truth": 0}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8541805591202017, "res": {"Yes": 0.8541805591202017, "yes": 0.14053358213701242}, "ground_truth": 1}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.85264175680997, "res": {"Yes": 0.85264175680997, "yes": 0.1362873029215536}, "ground_truth": 0}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8743710247176527, "res": {"Yes": 0.8743710247176527, "yes": 0.11771094245568539}, "ground_truth": 0}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9374270851521267, "res": {"Yes": 0.9374270851521267, "yes": 0.058592270138454965}, "ground_truth": 0}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9773330221931635, "res": {"Yes": 0.9773330221931635, "yes": 0.018615390391497122}, "ground_truth": 0}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9327486819140627, "res": {"Yes": 0.9327486819140627, "yes": 0.06314342833318497}, "ground_truth": 1}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9736261622577723, "res": {"Yes": 0.9736261622577723, " Yes": 0.016198202494979882}, "ground_truth": 0}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9743532192304301, "res": {"Yes": 0.9743532192304301, " Yes": 0.011788279545204216}, "ground_truth": 0}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8684916131973934, "res": {"Yes": 0.8684916131973934, "yes": 0.1241908231368044}, "ground_truth": 0}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9805472478503358, "res": {"Yes": 0.9805472478503358, "yes": 0.014591837236634911}, "ground_truth": 0}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9897084343380118, "res": {"Yes": 0.9897084343380118, "yes": 0.005828949986920803}, "ground_truth": 1}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9798168720834202, "res": {"Yes": 0.9798168720834202, "yes": 0.01602389873855272}, "ground_truth": 0}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9584560072939431, "res": {"Yes": 0.9584560072939431, "yes": 0.03614959260543727}, "ground_truth": 0}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9460516907862024, "res": {"Yes": 0.9460516907862024, "yes": 0.04950913900921846}, "ground_truth": 0}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8985167327043373, "res": {"Yes": 0.8985167327043373, "yes": 0.09326337491682371}, "ground_truth": 0}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9073767542997713, "res": {"Yes": 0.9073767542997713, "yes": 0.08699065325061199}, "ground_truth": 1}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9307167972301685, "res": {"Yes": 0.9307167972301685, "yes": 0.06370498895029182}, "ground_truth": 0}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9450626097305036, "res": {"Yes": 0.9450626097305036, "yes": 0.050630900867620704}, "ground_truth": 0}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9352039510640114, "res": {"Yes": 0.9352039510640114, "yes": 0.06139148096215129}, "ground_truth": 0}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9169521400164369, "res": {"Yes": 0.9169521400164369, "yes": 0.08108648320504387}, "ground_truth": 0}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8493904367466955, "res": {"Yes": 0.8493904367466955, "yes": 0.14497042831730014}, "ground_truth": 1}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8806683686165089, "res": {"Yes": 0.8806683686165089, "yes": 0.11532018522850919}, "ground_truth": 0}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6196075098697782, "res": {"Yes": 0.6196075098697782, "yes": 0.3741698426194729}, "ground_truth": 0}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5973066176690269, "res": {"Yes": 0.5973066176690269, "yes": 0.3866156623368325}, "ground_truth": 0}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7215223818314136, "res": {"Yes": 0.7215223818314136, "yes": 0.26374314105728985}, "ground_truth": 0}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8189450347652193, "res": {"Yes": 0.8189450347652193, "yes": 0.17078237394434664}, "ground_truth": 1}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8165033040324104, "res": {"Yes": 0.8165033040324104, "yes": 0.16509107296340791}, "ground_truth": 0}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8530247983418212, "res": {"Yes": 0.8530247983418212, "yes": 0.13304671125199682}, "ground_truth": 0}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.870132195522615, "res": {"Yes": 0.870132195522615, "yes": 0.11683471912223468}, "ground_truth": 0}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8811437583711965, "res": {"Yes": 0.8811437583711965, "yes": 0.10615630452412296}, "ground_truth": 0}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.710870594973757, "res": {"Yes": 0.710870594973757, "yes": 0.2755381457227999}, "ground_truth": 1}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8435318064508813, "res": {"Yes": 0.8435318064508813, "yes": 0.13391119802778106}, "ground_truth": 0}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8313687583738171, "res": {"Yes": 0.8313687583738171, "yes": 0.15669566411146751}, "ground_truth": 0}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6739740097601855, "res": {"Yes": 0.6739740097601855, "yes": 0.2634871444122979}, "ground_truth": 0}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.739217688273154, "res": {"Yes": 0.739217688273154, "yes": 0.2065840143849504}, "ground_truth": 0}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.629625097539197, "res": {"Yes": 0.629625097539197, "yes": 0.3272688089698838}, "ground_truth": 1}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7833637746592452, "res": {"Yes": 0.7833637746592452, "yes": 0.16735082860625689}, "ground_truth": 0}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5807846470288018, "res": {"Yes": 0.5807846470288018, "yes": 0.3681692603326818}, "ground_truth": 0}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8579069888277872, "res": {"Yes": 0.8579069888277872, "yes": 0.1361931336864111}, "ground_truth": 0}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9220194409673107, "res": {"Yes": 0.9220194409673107, "yes": 0.06689828148470099}, "ground_truth": 0}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9443952938500498, "res": {"Yes": 0.9443952938500498, "yes": 0.05196135550753981}, "ground_truth": 1}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9075032786610642, "res": {"Yes": 0.9075032786610642, "yes": 0.09097029084503364}, "ground_truth": 0}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8947920679607996, "res": {"Yes": 0.8947920679607996, "yes": 0.09761651390787562}, "ground_truth": 0}]