[{"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9848971898937114, "res": {"Yes": 0.9848971898937114, "No": 0.015102673467576905}, "ground_truth": 0}, {"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991845089936755, "res": {"Yes": 0.9991845089936755, "No": 0.0008153911672918551}, "ground_truth": 0}, {"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9961581588279562, "res": {"Yes": 0.9961581588279562, "No": 0.003841876730008229}, "ground_truth": 1}, {"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9938398808154502, "res": {"Yes": 0.9938398808154502, "No": 0.006160136146354635}, "ground_truth": 0}, {"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9904204223098619, "res": {"Yes": 0.9904204223098619, "No": 0.009579355163157135}, "ground_truth": 0}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9042144620004331, "res": {"Yes": 0.9042144620004331, "No": 0.09578533356849782}, "ground_truth": 0}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9583446488431494, "res": {"Yes": 0.9583446488431494, "No": 0.04165508204190534}, "ground_truth": 0}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9672569626789251, "res": {"Yes": 0.9672569626789251, "No": 0.0327428258144038}, "ground_truth": 1}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8126081467040448, "res": {"Yes": 0.8126081467040448, "No": 0.18739155017042888}, "ground_truth": 0}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9125980474841994, "res": {"Yes": 0.9125980474841994, "No": 0.08740181761732438}, "ground_truth": 0}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8310057041088553, "res": {"Yes": 0.8310057041088553, "No": 0.16899399453268732}, "ground_truth": 0}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9303985558134765, "res": {"Yes": 0.9303985558134765, "No": 0.06960124666143154}, "ground_truth": 0}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6438786514875406, "res": {"Yes": 0.6438786514875406, "No": 0.3561211133313868}, "ground_truth": 1}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9270830753591711, "res": {"Yes": 0.9270830753591711, "No": 0.07291675725930274}, "ground_truth": 0}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.941614147680248, "res": {"Yes": 0.941614147680248, "No": 0.05838565808757737}, "ground_truth": 0}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9457651978224154, "res": {"Yes": 0.9457651978224154, "No": 0.0542347191984697}, "ground_truth": 0}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7557577731725067, "res": {"Yes": 0.7557577731725067, "No": 0.24424176599726344}, "ground_truth": 0}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7528450117928462, "res": {"Yes": 0.7528450117928462, "No": 0.24715445843570233}, "ground_truth": 1}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7437484545800644, "res": {"Yes": 0.7437484545800644, "No": 0.25625131931408557}, "ground_truth": 0}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8074551235344416, "res": {"Yes": 0.8074551235344416, "No": 0.19254465078907504}, "ground_truth": 0}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9711144815219706, "res": {"Yes": 0.9711144815219706, "No": 0.028885342282025193}, "ground_truth": 0}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9337133067131402, "res": {"Yes": 0.9337133067131402, "No": 0.06628647497242111}, "ground_truth": 0}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9793155085677002, "res": {"Yes": 0.9793155085677002, "No": 0.020684374889483104}, "ground_truth": 1}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9597245968106042, "res": {"Yes": 0.9597245968106042, "No": 0.040275227551134545}, "ground_truth": 0}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9297515826003733, "res": {"Yes": 0.9297515826003733, "No": 0.07024825576812586}, "ground_truth": 0}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8213344363821423, "res": {"Yes": 0.8213344363821423, "No": 0.1786650955824351}, "ground_truth": 0}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8284709550518381, "res": {"Yes": 0.8284709550518381, "No": 0.17152905668371984}, "ground_truth": 0}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9967799470268207, "res": {"Yes": 0.9967799470268207, "No": 0.0032199852505849225}, "ground_truth": 1}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9827661729355105, "res": {"Yes": 0.9827661729355105, "No": 0.017233794953923874}, "ground_truth": 0}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8933521025936794, "res": {"Yes": 0.8933521025936794, "No": 0.1066476893530398}, "ground_truth": 0}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9995785909177606, "res": {"Yes": 0.9995785909177606, "No": 0.00042129523469393066}, "ground_truth": 0}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9966531156457525, "res": {"Yes": 0.9966531156457525, "No": 0.0033468709982382053}, "ground_truth": 0}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999775151112301, "res": {"Yes": 0.999775151112301, "No": 0.00022475716618091432}, "ground_truth": 1}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9958989244729414, "res": {"Yes": 0.9958989244729414, "No": 0.00410105713787669}, "ground_truth": 0}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995472650753987, "res": {"Yes": 0.9995472650753987, "No": 0.000452664918703929}, "ground_truth": 0}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9644111752186341, "res": {"Yes": 0.9644111752186341, "No": 0.0355886184038}, "ground_truth": 0}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7696581374955557, "res": {"Yes": 0.7696581374955557, "No": 0.23034165862474015}, "ground_truth": 0}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9164686907890292, "res": {"Yes": 0.9164686907890292, "No": 0.08353105500846479}, "ground_truth": 1}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9156657066410068, "res": {"Yes": 0.9156657066410068, "No": 0.08433411585944711}, "ground_truth": 0}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9340475574583024, "res": {"Yes": 0.9340475574583024, "No": 0.06595222913572273}, "ground_truth": 0}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8918188053964888, "res": {"Yes": 0.8918188053964888, "No": 0.10818080838227379}, "ground_truth": 0}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.988035988786904, "res": {"Yes": 0.988035988786904, "No": 0.011963935851769476}, "ground_truth": 0}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9698888690866742, "res": {"Yes": 0.9698888690866742, "No": 0.030110910434432675}, "ground_truth": 1}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9605010726615704, "res": {"Yes": 0.9605010726615704, "No": 0.03949879554782863}, "ground_truth": 0}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8862107410011735, "res": {"Yes": 0.8862107410011735, "No": 0.11378904810661977}, "ground_truth": 0}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9940405662913658, "res": {"Yes": 0.9940405662913658, "No": 0.005959345615440133}, "ground_truth": 0}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8950523328129705, "res": {"Yes": 0.8950523328129705, "No": 0.10494758961117136}, "ground_truth": 0}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.922022686481455, "res": {"Yes": 0.922022686481455, "No": 0.0779771768185573}, "ground_truth": 1}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8577139986237228, "res": {"Yes": 0.8577139986237228, "No": 0.1422858665959445}, "ground_truth": 0}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.993544894487849, "res": {"Yes": 0.993544894487849, "No": 0.006455085559549223}, "ground_truth": 0}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9630576153016666, "res": {"Yes": 0.9630576153016666, "No": 0.03694210579557498}, "ground_truth": 0}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9743129188660339, "res": {"Yes": 0.9743129188660339, "No": 0.025686844636126274}, "ground_truth": 0}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998235699211675, "res": {"Yes": 0.998235699211675, "No": 0.0017643092940573969}, "ground_truth": 1}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9672005297177442, "res": {"Yes": 0.9672005297177442, "No": 0.032799414971036306}, "ground_truth": 0}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8943459821800017, "res": {"Yes": 0.8943459821800017, "No": 0.10565375433101008}, "ground_truth": 0}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9937456921165977, "res": {"Yes": 0.9937456921165977, "No": 0.006254168985028308}, "ground_truth": 0}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.995454328282085, "res": {"Yes": 0.995454328282085, "No": 0.004545622941084481}, "ground_truth": 0}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981450725084146, "res": {"Yes": 0.9981450725084146, "No": 0.0018548805101703146}, "ground_truth": 1}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9880123631528094, "res": {"Yes": 0.9880123631528094, "No": 0.011987542421111372}, "ground_truth": 0}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992985389389214, "res": {"Yes": 0.9992985389389214, "No": 0.0007014188119813692}, "ground_truth": 0}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6538860488097364, "res": {"Yes": 0.6538860488097364, "No": 0.3461137502518998}, "ground_truth": 0}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9968262639815112, "res": {"Yes": 0.9968262639815112, "No": 0.0031737091380584686}, "ground_truth": 0}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9409809631949124, "res": {"Yes": 0.9409809631949124, "No": 0.059018873287517426}, "ground_truth": 1}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9874233129446017, "res": {"Yes": 0.9874233129446017, "No": 0.012576634390881773}, "ground_truth": 0}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8514660674469336, "res": {"Yes": 0.8514660674469336, "No": 0.1485337906736921}, "ground_truth": 0}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5510998887290345, "res": {"Yes": 0.5510998887290345, "No": 0.44889983534092925}, "ground_truth": 0}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7379496733497165, "res": {"Yes": 0.7379496733497165, "No": 0.2620502499261039}, "ground_truth": 0}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9893548727029071, "res": {"Yes": 0.9893548727029071, "No": 0.010644832668692785}, "ground_truth": 1}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990345743235264, "res": {"Yes": 0.9990345743235264, "No": 0.0009653858422597647}, "ground_truth": 0}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9979706427355283, "res": {"Yes": 0.9979706427355283, "No": 0.0020291696682965083}, "ground_truth": 0}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9083001984399279, "res": {"Yes": 0.9083001984399279, "No": 0.09169892709057481}, "ground_truth": 0}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9540088603484387, "res": {"Yes": 0.9540088603484387, "No": 0.04599050117555047}, "ground_truth": 0}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9615376376477388, "res": {"Yes": 0.9615376376477388, "No": 0.038461943772192586}, "ground_truth": 1}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9977783455355569, "res": {"Yes": 0.9977783455355569, "No": 0.0022216080465868654}, "ground_truth": 0}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9141343061973131, "res": {"Yes": 0.9141343061973131, "No": 0.08586477533849796}, "ground_truth": 0}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9946395547370258, "res": {"Yes": 0.9946395547370258, "No": 0.005360332887237616}, "ground_truth": 0}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.997928262017781, "res": {"Yes": 0.997928262017781, "No": 0.0020715598598544474}, "ground_truth": 0}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.994623161222984, "res": {"Yes": 0.994623161222984, "No": 0.005376608467323915}, "ground_truth": 1}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9973425096958116, "res": {"Yes": 0.9973425096958116, "No": 0.0026573289893932817}, "ground_truth": 0}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9849981529396292, "res": {"Yes": 0.9849981529396292, "No": 0.015001658759908294}, "ground_truth": 0}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.08891014731057456, "res": {"No": 0.9110894661616985, "Yes": 0.08891014731057456}, "ground_truth": 0}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.950481723062221, "res": {"Yes": 0.950481723062221, "No": 0.04951805145633629}, "ground_truth": 0}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.976584243032397, "res": {"Yes": 0.976584243032397, "No": 0.02341566584652346}, "ground_truth": 1}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9825259503672453, "res": {"Yes": 0.9825259503672453, "No": 0.01747402126772728}, "ground_truth": 0}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9965643121287343, "res": {"Yes": 0.9965643121287343, "No": 0.0034357106146063335}, "ground_truth": 0}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9694880929644174, "res": {"Yes": 0.9694880929644174, "No": 0.030511753772939253}, "ground_truth": 0}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9683226743313663, "res": {"Yes": 0.9683226743313663, "No": 0.031677218999368664}, "ground_truth": 0}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991991519501124, "res": {"Yes": 0.9991991519501124, "No": 0.0008007551804488691}, "ground_truth": 1}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9952723193303636, "res": {"Yes": 0.9952723193303636, "No": 0.00472765817265419}, "ground_truth": 0}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9599405092228297, "res": {"Yes": 0.9599405092228297, "No": 0.04005916871332842}, "ground_truth": 0}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6354678625307245, "res": {"Yes": 0.6354678625307245, "No": 0.36453172291629665}, "ground_truth": 0}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9600585085223138, "res": {"Yes": 0.9600585085223138, "No": 0.039941407949534706}, "ground_truth": 0}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9161136911810552, "res": {"Yes": 0.9161136911810552, "No": 0.08388596181077251}, "ground_truth": 1}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9216102980532695, "res": {"Yes": 0.9216102980532695, "No": 0.07838955402343092}, "ground_truth": 0}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.903252058467896, "res": {"Yes": 0.903252058467896, "No": 0.09674776965494788}, "ground_truth": 0}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.4279662464263779, "res": {"No": 0.5720336560986503, "Yes": 0.4279662464263779}, "ground_truth": 0}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7455697781718952, "res": {"Yes": 0.7455697781718952, "No": 0.25443003306074286}, "ground_truth": 0}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.30337527756864535, "res": {"No": 0.6966245145169379, "Yes": 0.30337527756864535}, "ground_truth": 1}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5780685030181179, "res": {"Yes": 0.5780685030181179, "No": 0.42193128920167994}, "ground_truth": 0}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9637343142334565, "res": {"Yes": 0.9637343142334565, "No": 0.03626562340675581}, "ground_truth": 0}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7601806854083663, "res": {"Yes": 0.7601806854083663, "No": 0.23981909756643635}, "ground_truth": 0}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9870360711156383, "res": {"Yes": 0.9870360711156383, "No": 0.012963844351297955}, "ground_truth": 0}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9664889088322102, "res": {"Yes": 0.9664889088322102, "No": 0.03351095763675833}, "ground_truth": 1}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9774557975918007, "res": {"Yes": 0.9774557975918007, "No": 0.02254397122994748}, "ground_truth": 0}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9727445118742546, "res": {"Yes": 0.9727445118742546, "No": 0.02725515503957888}, "ground_truth": 0}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9388890541024931, "res": {"Yes": 0.9388890541024931, "No": 0.06111073088457308}, "ground_truth": 0}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9665835655919897, "res": {"Yes": 0.9665835655919897, "No": 0.03341619888551503}, "ground_truth": 0}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.969097670520566, "res": {"Yes": 0.969097670520566, "No": 0.030902094938936187}, "ground_truth": 1}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9688847879933107, "res": {"Yes": 0.9688847879933107, "No": 0.03111518236433456}, "ground_truth": 0}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.32466431738967005, "res": {"No": 0.6753356043761913, "Yes": 0.32466431738967005}, "ground_truth": 0}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9911867275909941, "res": {"Yes": 0.9911867275909941, "No": 0.00881316458311862}, "ground_truth": 0}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9820980483908981, "res": {"Yes": 0.9820980483908981, "No": 0.017901979243240777}, "ground_truth": 0}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9945167959421559, "res": {"Yes": 0.9945167959421559, "No": 0.00548315288220908}, "ground_truth": 1}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9590303874500795, "res": {"Yes": 0.9590303874500795, "No": 0.04096933167918598}, "ground_truth": 0}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9967024939545579, "res": {"Yes": 0.9967024939545579, "No": 0.003297522005520247}, "ground_truth": 0}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8390395702831367, "res": {"Yes": 0.8390395702831367, "No": 0.16096028845196844}, "ground_truth": 0}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6710847607120507, "res": {"Yes": 0.6710847607120507, "No": 0.3289148093147837}, "ground_truth": 0}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8818915056577173, "res": {"Yes": 0.8818915056577173, "No": 0.11810825025563937}, "ground_truth": 1}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9908606570145996, "res": {"Yes": 0.9908606570145996, "No": 0.009139270597842855}, "ground_truth": 0}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.906467709945734, "res": {"Yes": 0.906467709945734, "No": 0.09353201452093676}, "ground_truth": 0}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8992733505941227, "res": {"Yes": 0.8992733505941227, "No": 0.10072646534836238}, "ground_truth": 0}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8698957866222755, "res": {"Yes": 0.8698957866222755, "No": 0.13010394123932106}, "ground_truth": 0}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.829245050350035, "res": {"Yes": 0.829245050350035, "No": 0.17075465821424782}, "ground_truth": 1}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9921294939109028, "res": {"Yes": 0.9921294939109028, "No": 0.00787041882541486}, "ground_truth": 0}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8386697108895476, "res": {"Yes": 0.8386697108895476, "No": 0.16132998746531843}, "ground_truth": 0}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9474193569937264, "res": {"Yes": 0.9474193569937264, "No": 0.05258054997287179}, "ground_truth": 0}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.38760691444471096, "res": {"No": 0.6123928075258649, "Yes": 0.38760691444471096}, "ground_truth": 0}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9623876181508239, "res": {"Yes": 0.9623876181508239, "No": 0.037612080134982906}, "ground_truth": 1}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9163641771605133, "res": {"Yes": 0.9163641771605133, "No": 0.0836356846571586}, "ground_truth": 0}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9750169817075672, "res": {"Yes": 0.9750169817075672, "No": 0.024982767294901665}, "ground_truth": 0}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8280840335101958, "res": {"Yes": 0.8280840335101958, "No": 0.17191591293388572}, "ground_truth": 0}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9893414551631081, "res": {"Yes": 0.9893414551631081, "No": 0.010658420947168852}, "ground_truth": 0}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9672835113451188, "res": {"Yes": 0.9672835113451188, "No": 0.032716380296573734}, "ground_truth": 1}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8349220765358142, "res": {"Yes": 0.8349220765358142, "No": 0.16507771763583548}, "ground_truth": 0}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6712944336286057, "res": {"Yes": 0.6712944336286057, "No": 0.32870522581668304}, "ground_truth": 0}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9698989521037685, "res": {"Yes": 0.9698989521037685, "No": 0.030100776807565517}, "ground_truth": 0}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8823623761863316, "res": {"Yes": 0.8823623761863316, "No": 0.11763699613581331}, "ground_truth": 0}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9967964155624365, "res": {"Yes": 0.9967964155624365, "No": 0.0032035843072510794}, "ground_truth": 1}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9748599444699032, "res": {"Yes": 0.9748599444699032, "No": 0.025139820898753672}, "ground_truth": 0}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9555246257517086, "res": {"Yes": 0.9555246257517086, "No": 0.04447517102583868}, "ground_truth": 0}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9441840883528804, "res": {"Yes": 0.9441840883528804, "No": 0.0558156937019002}, "ground_truth": 0}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6783158391886489, "res": {"Yes": 0.6783158391886489, "No": 0.3216837367830423}, "ground_truth": 0}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9416875098129769, "res": {"Yes": 0.9416875098129769, "No": 0.05831231342214087}, "ground_truth": 1}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9940399772231008, "res": {"Yes": 0.9940399772231008, "No": 0.005959957075667651}, "ground_truth": 0}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9926959948472194, "res": {"Yes": 0.9926959948472194, "No": 0.007303858564445295}, "ground_truth": 0}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7549755608440529, "res": {"Yes": 0.7549755608440529, "No": 0.24502429747191673}, "ground_truth": 0}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9623583620119276, "res": {"Yes": 0.9623583620119276, "No": 0.037641504284991655}, "ground_truth": 0}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.973487752638922, "res": {"Yes": 0.973487752638922, "No": 0.026512026997128464}, "ground_truth": 1}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9291069713745822, "res": {"Yes": 0.9291069713745822, "No": 0.07089277887974177}, "ground_truth": 0}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8849991450675866, "res": {"Yes": 0.8849991450675866, "No": 0.1150005923226974}, "ground_truth": 0}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9959625395484268, "res": {"Yes": 0.9959625395484268, "No": 0.004037456760356237}, "ground_truth": 0}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9888473164764571, "res": {"Yes": 0.9888473164764571, "No": 0.011152517743216198}, "ground_truth": 0}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991920054035751, "res": {"Yes": 0.9991920054035751, "No": 0.0008079692961334743}, "ground_truth": 1}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9979166265414575, "res": {"Yes": 0.9979166265414575, "No": 0.002083160640286924}, "ground_truth": 0}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9970901224247378, "res": {"Yes": 0.9970901224247378, "No": 0.002909808521894232}, "ground_truth": 0}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6648944451651754, "res": {"Yes": 0.6648944451651754, "No": 0.33510498774556546}, "ground_truth": 0}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9310394023412014, "res": {"Yes": 0.9310394023412014, "No": 0.0689604932469012}, "ground_truth": 0}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8674181527735938, "res": {"Yes": 0.8674181527735938, "No": 0.13258163633484957}, "ground_truth": 1}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8452489750455197, "res": {"Yes": 0.8452489750455197, "No": 0.15475083061007663}, "ground_truth": 0}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9546397354890888, "res": {"Yes": 0.9546397354890888, "No": 0.045360173255692256}, "ground_truth": 0}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.63384033107066, "res": {"Yes": 0.63384033107066, "No": 0.3661591880911727}, "ground_truth": 0}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8014616867571749, "res": {"Yes": 0.8014616867571749, "No": 0.19853692023578123}, "ground_truth": 0}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9628368692768108, "res": {"Yes": 0.9628368692768108, "No": 0.03716272634476429}, "ground_truth": 1}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8821038642392965, "res": {"Yes": 0.8821038642392965, "No": 0.11789538683288979}, "ground_truth": 0}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.570667671582196, "res": {"Yes": 0.570667671582196, "No": 0.4293320414652439}, "ground_truth": 0}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9307767056755518, "res": {"Yes": 0.9307767056755518, "No": 0.06922300703575479}, "ground_truth": 0}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9937759501460772, "res": {"Yes": 0.9937759501460772, "No": 0.00622384373042984}, "ground_truth": 0}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9661974751891997, "res": {"Yes": 0.9661974751891997, "No": 0.033802392330128314}, "ground_truth": 1}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9268754375684669, "res": {"Yes": 0.9268754375684669, "No": 0.07312436187236163}, "ground_truth": 0}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9920192040135679, "res": {"Yes": 0.9920192040135679, "No": 0.007980705203971051}, "ground_truth": 0}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.006285800992204353, "res": {"No": 0.9937141395024326, "Yes": 0.006285800992204353}, "ground_truth": 0}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9213649289404355, "res": {"Yes": 0.9213649289404355, "No": 0.07863489667677992}, "ground_truth": 0}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9330803796719918, "res": {"Yes": 0.9330803796719918, "No": 0.06691942468209938}, "ground_truth": 1}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9441080005673446, "res": {"Yes": 0.9441080005673446, "No": 0.05589194532445663}, "ground_truth": 0}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8701683937748671, "res": {"Yes": 0.8701683937748671, "No": 0.12983132123599586}, "ground_truth": 0}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.999607653787913, "res": {"Yes": 0.999607653787913, "No": 0.0003922481040058139}, "ground_truth": 0}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9968273299880879, "res": {"Yes": 0.9968273299880879, "No": 0.003172610590124492}, "ground_truth": 0}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9974025146182425, "res": {"Yes": 0.9974025146182425, "No": 0.0025973655438689333}, "ground_truth": 1}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9923407506899937, "res": {"Yes": 0.9923407506899937, "No": 0.007659060139122907}, "ground_truth": 0}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9976096130359473, "res": {"Yes": 0.9976096130359473, "No": 0.0023903822227229195}, "ground_truth": 0}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8760641501168384, "res": {"Yes": 0.8760641501168384, "No": 0.12393562517469985}, "ground_truth": 0}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8928353175051538, "res": {"Yes": 0.8928353175051538, "No": 0.10716449377442293}, "ground_truth": 0}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.46372843847305806, "res": {"No": 0.5362707043364585, "Yes": 0.46372843847305806}, "ground_truth": 1}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7095523505313068, "res": {"Yes": 0.7095523505313068, "No": 0.290447512173842}, "ground_truth": 0}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.5736660020390568, "res": {"Yes": 0.5736660020390568, "No": 0.42633351498181393}, "ground_truth": 0}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9691426793688429, "res": {"Yes": 0.9691426793688429, "No": 0.030857118799482178}, "ground_truth": 0}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8721947873763578, "res": {"Yes": 0.8721947873763578, "No": 0.1278052062995901}, "ground_truth": 0}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4438413500992741, "res": {"No": 0.5561583795440874, "Yes": 0.4438413500992741}, "ground_truth": 1}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9374850199361555, "res": {"Yes": 0.9374850199361555, "No": 0.06251476563390321}, "ground_truth": 0}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9419354511018901, "res": {"Yes": 0.9419354511018901, "No": 0.05806437326628701}, "ground_truth": 0}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9852089153474035, "res": {"Yes": 0.9852089153474035, "No": 0.014790961414663668}, "ground_truth": 0}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9861823887871095, "res": {"Yes": 0.9861823887871095, "No": 0.013817539715101189}, "ground_truth": 0}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994044916855028, "res": {"Yes": 0.9994044916855028, "No": 0.0005954564521824551}, "ground_truth": 1}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991793911837271, "res": {"Yes": 0.9991793911837271, "No": 0.0008205604299449305}, "ground_truth": 0}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9864549224383825, "res": {"Yes": 0.9864549224383825, "No": 0.01354503797271682}, "ground_truth": 0}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9062699855823785, "res": {"Yes": 0.9062699855823785, "No": 0.0937298097492645}, "ground_truth": 0}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.27933438563095586, "res": {"No": 0.7206653762789097, "Yes": 0.27933438563095586}, "ground_truth": 0}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9742918895229425, "res": {"Yes": 0.9742918895229425, "No": 0.02570794990731919}, "ground_truth": 1}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9718818037460694, "res": {"Yes": 0.9718818037460694, "No": 0.02811801232480878}, "ground_truth": 0}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7457275798012402, "res": {"Yes": 0.7457275798012402, "No": 0.2542723353315928}, "ground_truth": 0}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8830189479512257, "res": {"Yes": 0.8830189479512257, "No": 0.11698093572293217}, "ground_truth": 0}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9966872150211169, "res": {"Yes": 0.9966872150211169, "No": 0.003312778552117436}, "ground_truth": 0}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9961622925940449, "res": {"Yes": 0.9961622925940449, "No": 0.003837657978040748}, "ground_truth": 1}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.653100693640588, "res": {"Yes": 0.653100693640588, "No": 0.34689886390659197}, "ground_truth": 0}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7580201580195756, "res": {"Yes": 0.7580201580195756, "No": 0.24197946785481866}, "ground_truth": 0}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9455292786962853, "res": {"Yes": 0.9455292786962853, "No": 0.054470582155950506}, "ground_truth": 0}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9383639301389081, "res": {"Yes": 0.9383639301389081, "No": 0.061635701902060976}, "ground_truth": 0}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9957548167734219, "res": {"Yes": 0.9957548167734219, "No": 0.004245126329763645}, "ground_truth": 1}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9504053701303898, "res": {"Yes": 0.9504053701303898, "No": 0.04959440767974853}, "ground_truth": 0}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9981944825112931, "res": {"Yes": 0.9981944825112931, "No": 0.0018054747543155313}, "ground_truth": 0}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9251444513873381, "res": {"Yes": 0.9251444513873381, "No": 0.07485531963754607}, "ground_truth": 0}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8213519884865913, "res": {"Yes": 0.8213519884865913, "No": 0.17864776590866635}, "ground_truth": 0}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9822328211316446, "res": {"Yes": 0.9822328211316446, "No": 0.017767139914403228}, "ground_truth": 1}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9972280975356793, "res": {"Yes": 0.9972280975356793, "No": 0.0027718630600808243}, "ground_truth": 0}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9911327440759953, "res": {"Yes": 0.9911327440759953, "No": 0.008867213330032897}, "ground_truth": 0}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.953230285320403, "res": {"Yes": 0.953230285320403, "No": 0.04676948587164372}, "ground_truth": 0}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9527253557518714, "res": {"Yes": 0.9527253557518714, "No": 0.04727444047709944}, "ground_truth": 0}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9982461560848642, "res": {"Yes": 0.9982461560848642, "No": 0.0017536727078861373}, "ground_truth": 1}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9982038607921505, "res": {"Yes": 0.9982038607921505, "No": 0.00179616310055282}, "ground_truth": 0}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9308997243230289, "res": {"Yes": 0.9308997243230289, "No": 0.06909995496122293}, "ground_truth": 0}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9706782607024572, "res": {"Yes": 0.9706782607024572, "No": 0.029321677209544667}, "ground_truth": 0}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.986776801914099, "res": {"Yes": 0.986776801914099, "No": 0.013223069307312364}, "ground_truth": 0}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.994467395243462, "res": {"Yes": 0.994467395243462, "No": 0.005532546949210358}, "ground_truth": 1}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9241894055365111, "res": {"Yes": 0.9241894055365111, "No": 0.0758100052243648}, "ground_truth": 0}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.99714037274561, "res": {"Yes": 0.99714037274561, "No": 0.0028595557453775238}, "ground_truth": 0}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7620907773579056, "res": {"Yes": 0.7620907773579056, "No": 0.2379091026262}, "ground_truth": 0}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8598251451498602, "res": {"Yes": 0.8598251451498602, "No": 0.14017478090277752}, "ground_truth": 0}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9940740147286966, "res": {"Yes": 0.9940740147286966, "No": 0.00592590239858086}, "ground_truth": 1}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8451586988237825, "res": {"Yes": 0.8451586988237825, "No": 0.15484116955316024}, "ground_truth": 0}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6743351071576997, "res": {"Yes": 0.6743351071576997, "No": 0.3256644897901063}, "ground_truth": 0}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9982525672415645, "res": {"Yes": 0.9982525672415645, "No": 0.0017473996681692601}, "ground_truth": 0}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9975542148183264, "res": {"Yes": 0.9975542148183264, "No": 0.002445691680873663}, "ground_truth": 0}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990682513475985, "res": {"Yes": 0.9990682513475985, "No": 0.0009316874067694139}, "ground_truth": 1}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997527570895672, "res": {"Yes": 0.9997527570895672, "No": 0.00024713050513482945}, "ground_truth": 0}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9964744546805184, "res": {"Yes": 0.9964744546805184, "No": 0.003525425996319374}, "ground_truth": 0}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9974259973405185, "res": {"Yes": 0.9974259973405185, "No": 0.0025739412357571154}, "ground_truth": 0}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9453815966663089, "res": {"Yes": 0.9453815966663089, "No": 0.0546183266292296}, "ground_truth": 0}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9057247744954722, "res": {"Yes": 0.9057247744954722, "No": 0.09427496566860695}, "ground_truth": 1}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9290861967749632, "res": {"Yes": 0.9290861967749632, "No": 0.0709136456949673}, "ground_truth": 0}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9638758335874247, "res": {"Yes": 0.9638758335874247, "No": 0.03612389215533963}, "ground_truth": 0}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.835276651369428, "res": {"Yes": 0.835276651369428, "No": 0.1647231159162466}, "ground_truth": 0}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8389951694839328, "res": {"Yes": 0.8389951694839328, "No": 0.1610045908190765}, "ground_truth": 0}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6941252703064641, "res": {"Yes": 0.6941252703064641, "No": 0.30587468103962007}, "ground_truth": 1}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7511620286170828, "res": {"Yes": 0.7511620286170828, "No": 0.24883777521494607}, "ground_truth": 0}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6078085691283681, "res": {"Yes": 0.6078085691283681, "No": 0.3921912600066261}, "ground_truth": 0}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8221459376204632, "res": {"Yes": 0.8221459376204632, "No": 0.1778537670074112}, "ground_truth": 0}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8488784935931517, "res": {"Yes": 0.8488784935931517, "No": 0.15112120228359854}, "ground_truth": 0}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9715396119369965, "res": {"Yes": 0.9715396119369965, "No": 0.028460062222747478}, "ground_truth": 1}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9849826472207515, "res": {"Yes": 0.9849826472207515, "No": 0.015017282116276674}, "ground_truth": 0}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.717941636104784, "res": {"Yes": 0.717941636104784, "No": 0.28205770537170494}, "ground_truth": 0}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6383473232226287, "res": {"Yes": 0.6383473232226287, "No": 0.36165230939102183}, "ground_truth": 0}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6395845078107139, "res": {"Yes": 0.6395845078107139, "No": 0.36041501214321275}, "ground_truth": 0}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9922204485748028, "res": {"Yes": 0.9922204485748028, "No": 0.007779466627644489}, "ground_truth": 1}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8381628211988623, "res": {"Yes": 0.8381628211988623, "No": 0.16183698755812587}, "ground_truth": 0}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7449412446159516, "res": {"Yes": 0.7449412446159516, "No": 0.25505869087816535}, "ground_truth": 0}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5586946490858911, "res": {"Yes": 0.5586946490858911, "No": 0.4413051335432967}, "ground_truth": 0}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9684964128134745, "res": {"Yes": 0.9684964128134745, "No": 0.03150344733286513}, "ground_truth": 0}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9480990017064274, "res": {"Yes": 0.9480990017064274, "No": 0.05190080387547979}, "ground_truth": 1}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2782046458376869, "res": {"No": 0.7217951183195833, "Yes": 0.2782046458376869}, "ground_truth": 0}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8655385626627243, "res": {"Yes": 0.8655385626627243, "No": 0.13446123998793752}, "ground_truth": 0}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8835477838191178, "res": {"Yes": 0.8835477838191178, "No": 0.11645198929179175}, "ground_truth": 0}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9750625511660832, "res": {"Yes": 0.9750625511660832, "No": 0.024937277666521745}, "ground_truth": 0}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9063816267950436, "res": {"Yes": 0.9063816267950436, "No": 0.0936180848455672}, "ground_truth": 1}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9797705278414206, "res": {"Yes": 0.9797705278414206, "No": 0.020229341075175607}, "ground_truth": 0}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9925936892963646, "res": {"Yes": 0.9925936892963646, "No": 0.007406154020411779}, "ground_truth": 0}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5178250526320207, "res": {"Yes": 0.5178250526320207, "No": 0.4821746674374003}, "ground_truth": 0}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.851184670362058, "res": {"Yes": 0.851184670362058, "No": 0.14881530584671213}, "ground_truth": 0}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6524334221567673, "res": {"Yes": 0.6524334221567673, "No": 0.3475664879669443}, "ground_truth": 1}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.922425997313123, "res": {"Yes": 0.922425997313123, "No": 0.07757385722376213}, "ground_truth": 0}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8872258382539144, "res": {"Yes": 0.8872258382539144, "No": 0.1127740296528524}, "ground_truth": 0}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8797651694668758, "res": {"Yes": 0.8797651694668758, "No": 0.12023446109611895}, "ground_truth": 0}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8203433310092116, "res": {"Yes": 0.8203433310092116, "No": 0.17965640740743252}, "ground_truth": 0}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.794108323070017, "res": {"Yes": 0.794108323070017, "No": 0.20589107750620395}, "ground_truth": 1}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8859766483398085, "res": {"Yes": 0.8859766483398085, "No": 0.11402314783677109}, "ground_truth": 0}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9837048324021405, "res": {"Yes": 0.9837048324021405, "No": 0.016294999457256126}, "ground_truth": 0}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8871091136292732, "res": {"Yes": 0.8871091136292732, "No": 0.11289044994754975}, "ground_truth": 0}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9456612393512537, "res": {"Yes": 0.9456612393512537, "No": 0.05433851517891352}, "ground_truth": 0}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9953186159966814, "res": {"Yes": 0.9953186159966814, "No": 0.0046813595926163114}, "ground_truth": 1}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9247767795475311, "res": {"Yes": 0.9247767795475311, "No": 0.07522307851219787}, "ground_truth": 0}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9376058414156395, "res": {"Yes": 0.9376058414156395, "No": 0.062393916521624945}, "ground_truth": 0}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6333967727576919, "res": {"Yes": 0.6333967727576919, "No": 0.3666029121267231}, "ground_truth": 0}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8719956880344545, "res": {"Yes": 0.8719956880344545, "No": 0.12800386758643195}, "ground_truth": 0}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6077865425449469, "res": {"Yes": 0.6077865425449469, "No": 0.3922129527051231}, "ground_truth": 1}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8673474697483433, "res": {"Yes": 0.8673474697483433, "No": 0.13265228138716428}, "ground_truth": 0}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8741592602158798, "res": {"Yes": 0.8741592602158798, "No": 0.12584046815488015}, "ground_truth": 0}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6125550028644908, "res": {"Yes": 0.6125550028644908, "No": 0.38744492473742137}, "ground_truth": 0}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9058540668771614, "res": {"Yes": 0.9058540668771614, "No": 0.09414574676427861}, "ground_truth": 0}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9911244285071564, "res": {"Yes": 0.9911244285071564, "No": 0.008875563967551419}, "ground_truth": 1}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9538021165971093, "res": {"Yes": 0.9538021165971093, "No": 0.04619784162774232}, "ground_truth": 0}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.997496560544253, "res": {"Yes": 0.997496560544253, "No": 0.002503438730577117}, "ground_truth": 0}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9831803762418814, "res": {"Yes": 0.9831803762418814, "No": 0.016819648876700934}, "ground_truth": 0}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995581040647816, "res": {"Yes": 0.9995581040647816, "No": 0.0004418431955625094}, "ground_truth": 0}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9973365799138141, "res": {"Yes": 0.9973365799138141, "No": 0.002663362118454815}, "ground_truth": 1}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9903192916823724, "res": {"Yes": 0.9903192916823724, "No": 0.00968049347665259}, "ground_truth": 0}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9920201434561989, "res": {"Yes": 0.9920201434561989, "No": 0.007979767526199232}, "ground_truth": 0}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8669582742028765, "res": {"Yes": 0.8669582742028765, "No": 0.13304165557302775}, "ground_truth": 0}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9040818325089501, "res": {"Yes": 0.9040818325089501, "No": 0.09591790592263404}, "ground_truth": 0}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9672319816559438, "res": {"Yes": 0.9672319816559438, "No": 0.032767972132960864}, "ground_truth": 1}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9471073256101329, "res": {"Yes": 0.9471073256101329, "No": 0.052892501433746526}, "ground_truth": 0}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9560010161623235, "res": {"Yes": 0.9560010161623235, "No": 0.04399890639640131}, "ground_truth": 0}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9615911006266491, "res": {"Yes": 0.9615911006266491, "No": 0.038408657080216915}, "ground_truth": 0}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8606247930190392, "res": {"Yes": 0.8606247930190392, "No": 0.1393748239371169}, "ground_truth": 0}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9883871018275753, "res": {"Yes": 0.9883871018275753, "No": 0.011612757508209913}, "ground_truth": 1}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9879523130923256, "res": {"Yes": 0.9879523130923256, "No": 0.01204741698638231}, "ground_truth": 0}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9662330869858108, "res": {"Yes": 0.9662330869858108, "No": 0.033766651687628386}, "ground_truth": 0}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6540363683324768, "res": {"Yes": 0.6540363683324768, "No": 0.34596363866859686}, "ground_truth": 0}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9834295683365402, "res": {"Yes": 0.9834295683365402, "No": 0.016570408414831733}, "ground_truth": 0}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9838315122295145, "res": {"Yes": 0.9838315122295145, "No": 0.0161684817954835}, "ground_truth": 1}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6583975834257668, "res": {"Yes": 0.6583975834257668, "No": 0.3416021237569357}, "ground_truth": 0}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9821787713288974, "res": {"Yes": 0.9821787713288974, "No": 0.017821275190285236}, "ground_truth": 0}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.42726036392315336, "res": {"No": 0.5727394945145448, "Yes": 0.42726036392315336}, "ground_truth": 0}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9554792384544403, "res": {"Yes": 0.9554792384544403, "No": 0.044520590008539335}, "ground_truth": 0}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9674493914743342, "res": {"Yes": 0.9674493914743342, "No": 0.03255055369087126}, "ground_truth": 1}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9187757606917578, "res": {"Yes": 0.9187757606917578, "No": 0.08122404134166322}, "ground_truth": 0}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9615488857808142, "res": {"Yes": 0.9615488857808142, "No": 0.038450806805986705}, "ground_truth": 0}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8995735601522701, "res": {"Yes": 0.8995735601522701, "No": 0.1004262451508614}, "ground_truth": 0}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9085175988565147, "res": {"Yes": 0.9085175988565147, "No": 0.09148222389005597}, "ground_truth": 0}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9660870934333372, "res": {"Yes": 0.9660870934333372, "No": 0.03391282559537673}, "ground_truth": 1}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9664227665276168, "res": {"Yes": 0.9664227665276168, "No": 0.033577119877353144}, "ground_truth": 0}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7754971278367977, "res": {"Yes": 0.7754971278367977, "No": 0.2245027325856916}, "ground_truth": 0}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.24523817446172985, "res": {"No": 0.7547617216567896, "Yes": 0.24523817446172985}, "ground_truth": 0}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9409027568122632, "res": {"Yes": 0.9409027568122632, "No": 0.05909708375704421}, "ground_truth": 0}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9802214965183714, "res": {"Yes": 0.9802214965183714, "No": 0.01977850375473226}, "ground_truth": 1}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9123014830641007, "res": {"Yes": 0.9123014830641007, "No": 0.08769833192713451}, "ground_truth": 0}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.993992625169168, "res": {"Yes": 0.993992625169168, "No": 0.006007399617638233}, "ground_truth": 0}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9713521996219889, "res": {"Yes": 0.9713521996219889, "No": 0.028647568744447485}, "ground_truth": 0}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9850816567423925, "res": {"Yes": 0.9850816567423925, "No": 0.014918125930070166}, "ground_truth": 0}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9969532654445175, "res": {"Yes": 0.9969532654445175, "No": 0.003046721148208284}, "ground_truth": 1}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988635264757323, "res": {"Yes": 0.9988635264757323, "No": 0.0011364623991463815}, "ground_truth": 0}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9926401920103619, "res": {"Yes": 0.9926401920103619, "No": 0.007359605118929871}, "ground_truth": 0}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8532877069705592, "res": {"Yes": 0.8532877069705592, "No": 0.14671220504301571}, "ground_truth": 0}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.3443795798993929, "res": {"No": 0.6556200171585119, "Yes": 0.3443795798993929}, "ground_truth": 0}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9890035360367926, "res": {"Yes": 0.9890035360367926, "No": 0.010996312386273015}, "ground_truth": 1}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9703266166166999, "res": {"Yes": 0.9703266166166999, "No": 0.029673196956862446}, "ground_truth": 0}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9554198190482664, "res": {"Yes": 0.9554198190482664, "No": 0.04458001564867113}, "ground_truth": 0}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5850890330983487, "res": {"Yes": 0.5850890330983487, "No": 0.4149108255640391}, "ground_truth": 0}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.851814924938511, "res": {"Yes": 0.851814924938511, "No": 0.1481848200764888}, "ground_truth": 0}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9366908997855032, "res": {"Yes": 0.9366908997855032, "No": 0.06330884045163543}, "ground_truth": 1}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9412931628254136, "res": {"Yes": 0.9412931628254136, "No": 0.05870673633758988}, "ground_truth": 0}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9022857888703848, "res": {"Yes": 0.9022857888703848, "No": 0.09771414016065007}, "ground_truth": 0}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9991664196812675, "res": {"Yes": 0.9991664196812675, "No": 0.0008335828171512628}, "ground_truth": 0}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9960353863149438, "res": {"Yes": 0.9960353863149438, "No": 0.003964521944566578}, "ground_truth": 0}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9754637071186003, "res": {"Yes": 0.9754637071186003, "No": 0.02453613868203962}, "ground_truth": 1}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9895387960271835, "res": {"Yes": 0.9895387960271835, "No": 0.010460977222457986}, "ground_truth": 0}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.997728026647336, "res": {"Yes": 0.997728026647336, "No": 0.0022718277392266327}, "ground_truth": 0}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9912982344400509, "res": {"Yes": 0.9912982344400509, "No": 0.008701736287133078}, "ground_truth": 0}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9894317604362857, "res": {"Yes": 0.9894317604362857, "No": 0.010568176611377746}, "ground_truth": 0}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7579727605009934, "res": {"Yes": 0.7579727605009934, "No": 0.2420270941003613}, "ground_truth": 1}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9242347288962783, "res": {"Yes": 0.9242347288962783, "No": 0.07576516171253485}, "ground_truth": 0}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9244739887771838, "res": {"Yes": 0.9244739887771838, "No": 0.07552590265824669}, "ground_truth": 0}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9237167093147363, "res": {"Yes": 0.9237167093147363, "No": 0.07628312741061254}, "ground_truth": 0}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9614353739430789, "res": {"Yes": 0.9614353739430789, "No": 0.0385643655953946}, "ground_truth": 0}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4814375764854774, "res": {"No": 0.5185623021983303, "Yes": 0.4814375764854774}, "ground_truth": 1}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8774673344789699, "res": {"Yes": 0.8774673344789699, "No": 0.12253263209774638}, "ground_truth": 0}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9058530976138284, "res": {"Yes": 0.9058530976138284, "No": 0.0941467070557931}, "ground_truth": 0}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9441637914456952, "res": {"Yes": 0.9441637914456952, "No": 0.05583600314533272}, "ground_truth": 0}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9547724033536943, "res": {"Yes": 0.9547724033536943, "No": 0.04522755291932022}, "ground_truth": 0}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9431379335956669, "res": {"Yes": 0.9431379335956669, "No": 0.05686195446868289}, "ground_truth": 1}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9551794284075197, "res": {"Yes": 0.9551794284075197, "No": 0.044820457641847736}, "ground_truth": 0}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9730878822111966, "res": {"Yes": 0.9730878822111966, "No": 0.026912002561796444}, "ground_truth": 0}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6944709266076415, "res": {"Yes": 0.6944709266076415, "No": 0.30552890178022185}, "ground_truth": 0}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8569622760196979, "res": {"Yes": 0.8569622760196979, "No": 0.1430375064850778}, "ground_truth": 0}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9928328722432775, "res": {"Yes": 0.9928328722432775, "No": 0.007167020225090069}, "ground_truth": 1}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9808643757438157, "res": {"Yes": 0.9808643757438157, "No": 0.01913563452911953}, "ground_truth": 0}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9763438356341994, "res": {"Yes": 0.9763438356341994, "No": 0.02365614185201977}, "ground_truth": 0}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.961795952887396, "res": {"Yes": 0.961795952887396, "No": 0.03820390666046974}, "ground_truth": 0}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7327536486579384, "res": {"Yes": 0.7327536486579384, "No": 0.2672459188769849}, "ground_truth": 0}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9895899327552664, "res": {"Yes": 0.9895899327552664, "No": 0.010409920846359182}, "ground_truth": 1}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9706644529024069, "res": {"Yes": 0.9706644529024069, "No": 0.029335467504012756}, "ground_truth": 0}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9955211903169621, "res": {"Yes": 0.9955211903169621, "No": 0.004478780855760701}, "ground_truth": 0}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9938198604108562, "res": {"Yes": 0.9938198604108562, "No": 0.006179901167217778}, "ground_truth": 0}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9703025945380108, "res": {"Yes": 0.9703025945380108, "No": 0.02969703410335226}, "ground_truth": 0}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9649168125214203, "res": {"Yes": 0.9649168125214203, "No": 0.035082926196691164}, "ground_truth": 1}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9740936598036453, "res": {"Yes": 0.9740936598036453, "No": 0.02590609007205632}, "ground_truth": 0}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9681368342493579, "res": {"Yes": 0.9681368342493579, "No": 0.03186284759882694}, "ground_truth": 0}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8743160722256493, "res": {"Yes": 0.8743160722256493, "No": 0.12568311502177287}, "ground_truth": 0}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8522885719727631, "res": {"Yes": 0.8522885719727631, "No": 0.14771115021185374}, "ground_truth": 0}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9880186469314214, "res": {"Yes": 0.9880186469314214, "No": 0.011981177527678572}, "ground_truth": 1}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9811484330132636, "res": {"Yes": 0.9811484330132636, "No": 0.01885153379430882}, "ground_truth": 0}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8962804435020054, "res": {"Yes": 0.8962804435020054, "No": 0.10371915139875829}, "ground_truth": 0}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8188192872208243, "res": {"Yes": 0.8188192872208243, "No": 0.18118053751493088}, "ground_truth": 0}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9311172218664817, "res": {"Yes": 0.9311172218664817, "No": 0.06888263945383026}, "ground_truth": 0}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9713178881829017, "res": {"Yes": 0.9713178881829017, "No": 0.028681986538826847}, "ground_truth": 1}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9843520416582421, "res": {"Yes": 0.9843520416582421, "No": 0.015647767874433754}, "ground_truth": 0}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9885654234391089, "res": {"Yes": 0.9885654234391089, "No": 0.011434438000797858}, "ground_truth": 0}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.38967785266798305, "res": {"No": 0.6103215924491211, "Yes": 0.38967785266798305}, "ground_truth": 0}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9506376908985925, "res": {"Yes": 0.9506376908985925, "No": 0.049362078069386243}, "ground_truth": 0}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9973449952759116, "res": {"Yes": 0.9973449952759116, "No": 0.00265496581520534}, "ground_truth": 1}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.984296371234113, "res": {"Yes": 0.984296371234113, "No": 0.015703573202613046}, "ground_truth": 0}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.99844385704802, "res": {"Yes": 0.99844385704802, "No": 0.0015560869100466385}, "ground_truth": 0}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9974078502372659, "res": {"Yes": 0.9974078502372659, "No": 0.002592073874328269}, "ground_truth": 0}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998621429831519, "res": {"Yes": 0.9998621429831519, "No": 0.00013782841720491837}, "ground_truth": 0}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.997215177033536, "res": {"Yes": 0.997215177033536, "No": 0.002784789477561307}, "ground_truth": 1}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995008271282537, "res": {"Yes": 0.9995008271282537, "No": 0.000499114015667805}, "ground_truth": 0}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999001571046336, "res": {"Yes": 0.9999001571046336, "No": 9.979694363655761e-05}, "ground_truth": 0}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9535243564594547, "res": {"Yes": 0.9535243564594547, "No": 0.046475476156225104}, "ground_truth": 0}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.899297555761379, "res": {"Yes": 0.899297555761379, "No": 0.10070228382714001}, "ground_truth": 0}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.951448422719962, "res": {"Yes": 0.951448422719962, "No": 0.04855147517749615}, "ground_truth": 1}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8808553455504469, "res": {"Yes": 0.8808553455504469, "No": 0.11914443289842573}, "ground_truth": 0}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7229644703563252, "res": {"Yes": 0.7229644703563252, "No": 0.2770347581447144}, "ground_truth": 0}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5946621201661672, "res": {"Yes": 0.5946621201661672, "No": 0.40533718609529296}, "ground_truth": 0}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9109414625896816, "res": {"Yes": 0.9109414625896816, "No": 0.08905827039042435}, "ground_truth": 0}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9957438326626211, "res": {"Yes": 0.9957438326626211, "No": 0.004256040546861674}, "ground_truth": 1}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9955864171106058, "res": {"Yes": 0.9955864171106058, "No": 0.004413470224440286}, "ground_truth": 0}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.989912238265157, "res": {"Yes": 0.989912238265157, "No": 0.01008758014940039}, "ground_truth": 0}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.979975408499716, "res": {"Yes": 0.979975408499716, "No": 0.020024637270990218}, "ground_truth": 0}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.919796186403339, "res": {"Yes": 0.919796186403339, "No": 0.08020367036558027}, "ground_truth": 0}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9853313504329446, "res": {"Yes": 0.9853313504329446, "No": 0.014668616490998302}, "ground_truth": 1}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7956158819275627, "res": {"Yes": 0.7956158819275627, "No": 0.20438406164282835}, "ground_truth": 0}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9817174972734993, "res": {"Yes": 0.9817174972734993, "No": 0.018282481001186304}, "ground_truth": 0}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.964160108988846, "res": {"Yes": 0.964160108988846, "No": 0.03583953733996088}, "ground_truth": 0}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9969782606722515, "res": {"Yes": 0.9969782606722515, "No": 0.003021644679418233}, "ground_truth": 0}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9499354671601078, "res": {"Yes": 0.9499354671601078, "No": 0.050064355057931144}, "ground_truth": 1}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.953161731463509, "res": {"Yes": 0.953161731463509, "No": 0.04683824541829603}, "ground_truth": 0}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.987303494420985, "res": {"Yes": 0.987303494420985, "No": 0.012696232285068842}, "ground_truth": 0}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9840677555836317, "res": {"Yes": 0.9840677555836317, "No": 0.01593208943712579}, "ground_truth": 0}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9552018352693603, "res": {"Yes": 0.9552018352693603, "No": 0.044797914140901285}, "ground_truth": 0}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8225421511968349, "res": {"Yes": 0.8225421511968349, "No": 0.17745752401992282}, "ground_truth": 1}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9619786018143943, "res": {"Yes": 0.9619786018143943, "No": 0.038021271483129185}, "ground_truth": 0}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8690598185585947, "res": {"Yes": 0.8690598185585947, "No": 0.13093987900746185}, "ground_truth": 0}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9855881139356322, "res": {"Yes": 0.9855881139356322, "No": 0.014411770650644667}, "ground_truth": 0}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9905834559571693, "res": {"Yes": 0.9905834559571693, "No": 0.009416469955098903}, "ground_truth": 0}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9865515450042804, "res": {"Yes": 0.9865515450042804, "No": 0.013448280835846382}, "ground_truth": 1}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.942882044064728, "res": {"Yes": 0.942882044064728, "No": 0.05711780440370875}, "ground_truth": 0}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9956406470845108, "res": {"Yes": 0.9956406470845108, "No": 0.004359345271432675}, "ground_truth": 0}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9946683241075677, "res": {"Yes": 0.9946683241075677, "No": 0.0053317130217045115}, "ground_truth": 0}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9936711815725087, "res": {"Yes": 0.9936711815725087, "No": 0.006328740726363825}, "ground_truth": 0}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9894377207911631, "res": {"Yes": 0.9894377207911631, "No": 0.010562228402523452}, "ground_truth": 1}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9788831892154197, "res": {"Yes": 0.9788831892154197, "No": 0.02111683055128404}, "ground_truth": 0}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9945059517901353, "res": {"Yes": 0.9945059517901353, "No": 0.005493979075091794}, "ground_truth": 0}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8868805528019149, "res": {"Yes": 0.8868805528019149, "No": 0.11311910373499849}, "ground_truth": 0}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7445806388226256, "res": {"Yes": 0.7445806388226256, "No": 0.25541901150455204}, "ground_truth": 0}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.875795160881045, "res": {"Yes": 0.875795160881045, "No": 0.12420469602653522}, "ground_truth": 1}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.935787563815345, "res": {"Yes": 0.935787563815345, "No": 0.064212203543692}, "ground_truth": 0}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8934245063553876, "res": {"Yes": 0.8934245063553876, "No": 0.10657537475362061}, "ground_truth": 0}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9996337458174444, "res": {"Yes": 0.9996337458174444, "No": 0.00036617635366355977}, "ground_truth": 0}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9984044059159483, "res": {"Yes": 0.9984044059159483, "No": 0.0015955528314631345}, "ground_truth": 0}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9969780228929647, "res": {"Yes": 0.9969780228929647, "No": 0.00302194504582733}, "ground_truth": 1}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999937345628867, "res": {"Yes": 0.999937345628867, "No": 6.252947577475572e-05}, "ground_truth": 0}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9976349997303009, "res": {"Yes": 0.9976349997303009, "No": 0.0023649823974609464}, "ground_truth": 0}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9853711674919626, "res": {"Yes": 0.9853711674919626, "No": 0.014628810101981476}, "ground_truth": 0}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8907876122640566, "res": {"Yes": 0.8907876122640566, "No": 0.1092123310874829}, "ground_truth": 0}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9972531144132435, "res": {"Yes": 0.9972531144132435, "No": 0.0027468793320745205}, "ground_truth": 1}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9975224236711101, "res": {"Yes": 0.9975224236711101, "No": 0.0024775874368786165}, "ground_truth": 0}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9731579840410228, "res": {"Yes": 0.9731579840410228, "No": 0.02684186187159849}, "ground_truth": 0}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9422673487111375, "res": {"Yes": 0.9422673487111375, "No": 0.05773249456195811}, "ground_truth": 0}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9840406242255636, "res": {"Yes": 0.9840406242255636, "No": 0.01595920340776951}, "ground_truth": 0}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9493518218924153, "res": {"Yes": 0.9493518218924153, "No": 0.050648108171090805}, "ground_truth": 1}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9622273246710666, "res": {"Yes": 0.9622273246710666, "No": 0.037772441616991004}, "ground_truth": 0}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8521781054875516, "res": {"Yes": 0.8521781054875516, "No": 0.1478216351385097}, "ground_truth": 0}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9922589352702964, "res": {"Yes": 0.9922589352702964, "No": 0.0077407553941444595}, "ground_truth": 0}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.996614744342482, "res": {"Yes": 0.996614744342482, "No": 0.0033850825023169816}, "ground_truth": 0}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996245705712715, "res": {"Yes": 0.9996245705712715, "No": 0.0003753073003402863}, "ground_truth": 1}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994616477336113, "res": {"Yes": 0.9994616477336113, "No": 0.0005381388207246903}, "ground_truth": 0}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9616583441585844, "res": {"Yes": 0.9616583441585844, "No": 0.03834121698323449}, "ground_truth": 0}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9990423078800988, "res": {"Yes": 0.9990423078800988, "No": 0.0009576473101714003}, "ground_truth": 0}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9924850485177548, "res": {"Yes": 0.9924850485177548, "No": 0.0075147313869663645}, "ground_truth": 0}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9809575151998402, "res": {"Yes": 0.9809575151998402, "No": 0.019042346817363397}, "ground_truth": 1}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9754928583115988, "res": {"Yes": 0.9754928583115988, "No": 0.02450693598667952}, "ground_truth": 0}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9456714799722624, "res": {"Yes": 0.9456714799722624, "No": 0.05432831679676253}, "ground_truth": 0}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9174337029768769, "res": {"Yes": 0.9174337029768769, "No": 0.08256604173472398}, "ground_truth": 0}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9761194566011802, "res": {"Yes": 0.9761194566011802, "No": 0.023880525137904722}, "ground_truth": 0}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9808481994619209, "res": {"Yes": 0.9808481994619209, "No": 0.019151745857835398}, "ground_truth": 1}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9939244309667301, "res": {"Yes": 0.9939244309667301, "No": 0.006075408799018651}, "ground_truth": 0}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9836142934250551, "res": {"Yes": 0.9836142934250551, "No": 0.016385498059651497}, "ground_truth": 0}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8780085564563767, "res": {"Yes": 0.8780085564563767, "No": 0.12199100091027983}, "ground_truth": 0}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9091575295357776, "res": {"Yes": 0.9091575295357776, "No": 0.09084223121629693}, "ground_truth": 0}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9334359977004877, "res": {"Yes": 0.9334359977004877, "No": 0.06656346004307889}, "ground_truth": 1}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8531767953103897, "res": {"Yes": 0.8531767953103897, "No": 0.14682301458316088}, "ground_truth": 0}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9789139187988246, "res": {"Yes": 0.9789139187988246, "No": 0.021085942347314934}, "ground_truth": 0}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.74260626552023, "res": {"Yes": 0.74260626552023, "No": 0.2573937668307686}, "ground_truth": 0}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.4790703614398882, "res": {"No": 0.5209293453274888, "Yes": 0.4790703614398882}, "ground_truth": 0}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.32844443598708684, "res": {"No": 0.6715555239425662, "Yes": 0.32844443598708684}, "ground_truth": 1}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4106639248831377, "res": {"No": 0.5893360233235723, "Yes": 0.4106639248831377}, "ground_truth": 0}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9300232027058359, "res": {"Yes": 0.9300232027058359, "No": 0.06997667547211218}, "ground_truth": 0}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8290549096774515, "res": {"Yes": 0.8290549096774515, "No": 0.17094489942330615}, "ground_truth": 0}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7261310578461168, "res": {"Yes": 0.7261310578461168, "No": 0.27386853144097995}, "ground_truth": 0}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9009437355446303, "res": {"Yes": 0.9009437355446303, "No": 0.0990561418686259}, "ground_truth": 1}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7811901413022825, "res": {"Yes": 0.7811901413022825, "No": 0.21880945420967654}, "ground_truth": 0}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.939839343025643, "res": {"Yes": 0.939839343025643, "No": 0.06016058953121615}, "ground_truth": 0}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8861541086271526, "res": {"Yes": 0.8861541086271526, "No": 0.11384560393475576}, "ground_truth": 0}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6587679835450911, "res": {"Yes": 0.6587679835450911, "No": 0.3412315158354378}, "ground_truth": 0}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9453433444646676, "res": {"Yes": 0.9453433444646676, "No": 0.054656337819725065}, "ground_truth": 1}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9163411492180886, "res": {"Yes": 0.9163411492180886, "No": 0.08365877129193304}, "ground_truth": 0}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8668329297680037, "res": {"Yes": 0.8668329297680037, "No": 0.13316660040879813}, "ground_truth": 0}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.22496258038694789, "res": {"No": 0.7750369913450968, "Yes": 0.22496258038694789}, "ground_truth": 0}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7807674990044948, "res": {"Yes": 0.7807674990044948, "No": 0.21923231770283674}, "ground_truth": 0}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9569365860455289, "res": {"Yes": 0.9569365860455289, "No": 0.04306326782018836}, "ground_truth": 1}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8984282116025625, "res": {"Yes": 0.8984282116025625, "No": 0.10157167216379535}, "ground_truth": 0}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9408626554504512, "res": {"Yes": 0.9408626554504512, "No": 0.05913718290704591}, "ground_truth": 0}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9994442608402433, "res": {"Yes": 0.9994442608402433, "No": 0.0005556797604844731}, "ground_truth": 0}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9916628087640998, "res": {"Yes": 0.9916628087640998, "No": 0.008337165403999635}, "ground_truth": 0}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.983826892167581, "res": {"Yes": 0.983826892167581, "No": 0.01617303386378248}, "ground_truth": 1}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9951084520779762, "res": {"Yes": 0.9951084520779762, "No": 0.004891516015123923}, "ground_truth": 0}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9537378134260927, "res": {"Yes": 0.9537378134260927, "No": 0.04626210127771526}, "ground_truth": 0}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9986900223707568, "res": {"Yes": 0.9986900223707568, "No": 0.0013097748850619722}, "ground_truth": 0}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9855370657357404, "res": {"Yes": 0.9855370657357404, "No": 0.014462854139797423}, "ground_truth": 0}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985159975222789, "res": {"Yes": 0.9985159975222789, "No": 0.0014838093690526684}, "ground_truth": 1}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9957676998330056, "res": {"Yes": 0.9957676998330056, "No": 0.004232310151055902}, "ground_truth": 0}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9974473369992934, "res": {"Yes": 0.9974473369992934, "No": 0.0025525332029456703}, "ground_truth": 0}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8478890237275871, "res": {"Yes": 0.8478890237275871, "No": 0.15211093329787542}, "ground_truth": 0}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9606956658755595, "res": {"Yes": 0.9606956658755595, "No": 0.03930415659199894}, "ground_truth": 0}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9736700114730049, "res": {"Yes": 0.9736700114730049, "No": 0.026329910622279986}, "ground_truth": 1}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9166470474504499, "res": {"Yes": 0.9166470474504499, "No": 0.08335280642385318}, "ground_truth": 0}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9352510809194423, "res": {"Yes": 0.9352510809194423, "No": 0.06474882016231054}, "ground_truth": 0}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.2786867465956818, "res": {"No": 0.721313127398977, "Yes": 0.2786867465956818}, "ground_truth": 0}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6786497205578867, "res": {"Yes": 0.6786497205578867, "No": 0.32135013084244857}, "ground_truth": 0}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9433006313757379, "res": {"Yes": 0.9433006313757379, "No": 0.05669910801670227}, "ground_truth": 1}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7889980575919023, "res": {"Yes": 0.7889980575919023, "No": 0.2110017820093059}, "ground_truth": 0}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9834842170691398, "res": {"Yes": 0.9834842170691398, "No": 0.016515672506794116}, "ground_truth": 0}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9666556348161083, "res": {"Yes": 0.9666556348161083, "No": 0.033344324533770904}, "ground_truth": 0}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9980664481245344, "res": {"Yes": 0.9980664481245344, "No": 0.0019335099515894794}, "ground_truth": 0}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9854461748291579, "res": {"Yes": 0.9854461748291579, "No": 0.014553714876212034}, "ground_truth": 1}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998346147876424, "res": {"Yes": 0.9998346147876424, "No": 0.00016535101170310075}, "ground_truth": 0}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9907625904434094, "res": {"Yes": 0.9907625904434094, "No": 0.009237310565830855}, "ground_truth": 0}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9938487145011053, "res": {"Yes": 0.9938487145011053, "No": 0.006151245483656017}, "ground_truth": 0}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9483132450862728, "res": {"Yes": 0.9483132450862728, "No": 0.05168648208664417}, "ground_truth": 0}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9960242697256364, "res": {"Yes": 0.9960242697256364, "No": 0.00397563816145241}, "ground_truth": 1}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9941879407468374, "res": {"Yes": 0.9941879407468374, "No": 0.005812020535425349}, "ground_truth": 0}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.976563435173608, "res": {"Yes": 0.976563435173608, "No": 0.023436351010621893}, "ground_truth": 0}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.980894419098877, "res": {"Yes": 0.980894419098877, "No": 0.019105584585324496}, "ground_truth": 0}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9854534661723796, "res": {"Yes": 0.9854534661723796, "No": 0.014546444201617732}, "ground_truth": 0}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9750577879971548, "res": {"Yes": 0.9750577879971548, "No": 0.024942105990844694}, "ground_truth": 1}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9414534259801588, "res": {"Yes": 0.9414534259801588, "No": 0.05854652744032556}, "ground_truth": 0}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9934549944662345, "res": {"Yes": 0.9934549944662345, "No": 0.006544946852330484}, "ground_truth": 0}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.41183306243834394, "res": {"No": 0.5881664634756584, "Yes": 0.41183306243834394}, "ground_truth": 0}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7758654523336612, "res": {"Yes": 0.7758654523336612, "No": 0.22413433434098687}, "ground_truth": 0}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.19164125849772948, "res": {"No": 0.8083586461049096, "Yes": 0.19164125849772948}, "ground_truth": 1}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5789028122435071, "res": {"Yes": 0.5789028122435071, "No": 0.42109690232157637}, "ground_truth": 0}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6133477879592846, "res": {"Yes": 0.6133477879592846, "No": 0.3866521219128328}, "ground_truth": 0}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5224000527728067, "res": {"Yes": 0.5224000527728067, "No": 0.47759974061729893}, "ground_truth": 0}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8780312444906037, "res": {"Yes": 0.8780312444906037, "No": 0.12196855663104934}, "ground_truth": 0}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9593552641386847, "res": {"Yes": 0.9593552641386847, "No": 0.040644571270005654}, "ground_truth": 1}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7509138406234668, "res": {"Yes": 0.7509138406234668, "No": 0.2490859901914405}, "ground_truth": 0}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9839741407507984, "res": {"Yes": 0.9839741407507984, "No": 0.01602583774760777}, "ground_truth": 0}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9693789774035464, "res": {"Yes": 0.9693789774035464, "No": 0.030620832530922275}, "ground_truth": 0}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9815812531752812, "res": {"Yes": 0.9815812531752812, "No": 0.018418748369540442}, "ground_truth": 0}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8449479256277399, "res": {"Yes": 0.8449479256277399, "No": 0.15505195882657535}, "ground_truth": 1}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9001575602822356, "res": {"Yes": 0.9001575602822356, "No": 0.09984223388119491}, "ground_truth": 0}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9515206185108981, "res": {"Yes": 0.9515206185108981, "No": 0.048479284305760685}, "ground_truth": 0}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9760092882155834, "res": {"Yes": 0.9760092882155834, "No": 0.023990676578651938}, "ground_truth": 0}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9751463136118783, "res": {"Yes": 0.9751463136118783, "No": 0.0248535668879868}, "ground_truth": 0}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7659094482516777, "res": {"Yes": 0.7659094482516777, "No": 0.23409049678289748}, "ground_truth": 1}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9305725752234326, "res": {"Yes": 0.9305725752234326, "No": 0.06942729424182763}, "ground_truth": 0}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9845435856687423, "res": {"Yes": 0.9845435856687423, "No": 0.015456386428199709}, "ground_truth": 0}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9790401657200654, "res": {"Yes": 0.9790401657200654, "No": 0.020959618940797967}, "ground_truth": 0}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9611038080326947, "res": {"Yes": 0.9611038080326947, "No": 0.038896130745559684}, "ground_truth": 0}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9732528316667918, "res": {"Yes": 0.9732528316667918, "No": 0.026746862559878862}, "ground_truth": 1}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7977633096314513, "res": {"Yes": 0.7977633096314513, "No": 0.2022362319042055}, "ground_truth": 0}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9748872458049332, "res": {"Yes": 0.9748872458049332, "No": 0.02511234301826239}, "ground_truth": 0}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9907182340044212, "res": {"Yes": 0.9907182340044212, "No": 0.009281649501248166}, "ground_truth": 0}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8971323144979876, "res": {"Yes": 0.8971323144979876, "No": 0.10286765704302021}, "ground_truth": 0}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8557936457080945, "res": {"Yes": 0.8557936457080945, "No": 0.1442062662218774}, "ground_truth": 1}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9966030232254, "res": {"Yes": 0.9966030232254, "No": 0.003396969052804102}, "ground_truth": 0}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9520620925984693, "res": {"Yes": 0.9520620925984693, "No": 0.0479377252998123}, "ground_truth": 0}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9790489575402287, "res": {"Yes": 0.9790489575402287, "No": 0.02095093088394375}, "ground_truth": 0}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.913116783260813, "res": {"Yes": 0.913116783260813, "No": 0.08688288644450853}, "ground_truth": 0}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9593542760032715, "res": {"Yes": 0.9593542760032715, "No": 0.04064546546040965}, "ground_truth": 1}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9821490981921027, "res": {"Yes": 0.9821490981921027, "No": 0.01785081167145684}, "ground_truth": 0}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9742053187662838, "res": {"Yes": 0.9742053187662838, "No": 0.025794365561347357}, "ground_truth": 0}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8054500656749156, "res": {"Yes": 0.8054500656749156, "No": 0.19454933455289689}, "ground_truth": 0}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9761358125958252, "res": {"Yes": 0.9761358125958252, "No": 0.02386398882615449}, "ground_truth": 0}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9820282531628017, "res": {"Yes": 0.9820282531628017, "No": 0.017971525325746973}, "ground_truth": 1}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7532550098611132, "res": {"Yes": 0.7532550098611132, "No": 0.24674444835241813}, "ground_truth": 0}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9237069419858919, "res": {"Yes": 0.9237069419858919, "No": 0.07629278546588472}, "ground_truth": 0}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8090088197043048, "res": {"Yes": 0.8090088197043048, "No": 0.19099082284916039}, "ground_truth": 0}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9570734865727445, "res": {"Yes": 0.9570734865727445, "No": 0.04292640247703569}, "ground_truth": 0}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9227031043349124, "res": {"Yes": 0.9227031043349124, "No": 0.0772967011683432}, "ground_truth": 1}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9295128846515855, "res": {"Yes": 0.9295128846515855, "No": 0.07048680851036167}, "ground_truth": 0}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9793345965922995, "res": {"Yes": 0.9793345965922995, "No": 0.02066537674743033}, "ground_truth": 0}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9438716434529546, "res": {"Yes": 0.9438716434529546, "No": 0.05612811687166289}, "ground_truth": 0}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9910587320195995, "res": {"Yes": 0.9910587320195995, "No": 0.008941103172841365}, "ground_truth": 0}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9923209159812977, "res": {"Yes": 0.9923209159812977, "No": 0.007678884421428227}, "ground_truth": 1}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9840781424736086, "res": {"Yes": 0.9840781424736086, "No": 0.015921864318321495}, "ground_truth": 0}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9707528436904425, "res": {"Yes": 0.9707528436904425, "No": 0.029246955388616647}, "ground_truth": 0}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9499398805705406, "res": {"Yes": 0.9499398805705406, "No": 0.05006000966050509}, "ground_truth": 0}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.5165923328452139, "res": {"Yes": 0.5165923328452139, "No": 0.483407135381207}, "ground_truth": 0}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9460214999203375, "res": {"Yes": 0.9460214999203375, "No": 0.05397837368225466}, "ground_truth": 1}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.975357768733869, "res": {"Yes": 0.975357768733869, "No": 0.024642179265772453}, "ground_truth": 0}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9684221687873595, "res": {"Yes": 0.9684221687873595, "No": 0.031577798008256726}, "ground_truth": 0}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.920633434588764, "res": {"Yes": 0.920633434588764, "No": 0.07936625508469518}, "ground_truth": 0}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9283286594549264, "res": {"Yes": 0.9283286594549264, "No": 0.0716713638197081}, "ground_truth": 0}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9771943543396133, "res": {"Yes": 0.9771943543396133, "No": 0.02280543942963892}, "ground_truth": 1}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9630651368110135, "res": {"Yes": 0.9630651368110135, "No": 0.0369347882892398}, "ground_truth": 0}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6207219393783899, "res": {"Yes": 0.6207219393783899, "No": 0.37927762206911897}, "ground_truth": 0}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.45743389340345786, "res": {"No": 0.5425659142643794, "Yes": 0.45743389340345786}, "ground_truth": 0}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.16265413560168643, "res": {"No": 0.8373456590227637, "Yes": 0.16265413560168643}, "ground_truth": 0}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9443971042575634, "res": {"Yes": 0.9443971042575634, "No": 0.05560278439126079}, "ground_truth": 1}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7958482993549449, "res": {"Yes": 0.7958482993549449, "No": 0.20415152321304164}, "ground_truth": 0}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.4848630010544883, "res": {"No": 0.5151367394838574, "Yes": 0.4848630010544883}, "ground_truth": 0}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7883386835524101, "res": {"Yes": 0.7883386835524101, "No": 0.21166107212761812}, "ground_truth": 0}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9889846432826984, "res": {"Yes": 0.9889846432826984, "No": 0.011015080395780447}, "ground_truth": 0}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9939338435750529, "res": {"Yes": 0.9939338435750529, "No": 0.006066156927725553}, "ground_truth": 1}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.721132035621937, "res": {"Yes": 0.721132035621937, "No": 0.27886772877202975}, "ground_truth": 0}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9486229654463592, "res": {"Yes": 0.9486229654463592, "No": 0.05137678819788479}, "ground_truth": 0}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.2711889957180535, "res": {"No": 0.7288106354825217, "Yes": 0.2711889957180535}, "ground_truth": 0}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8666251574826609, "res": {"Yes": 0.8666251574826609, "No": 0.13337462246395365}, "ground_truth": 0}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6678432758255444, "res": {"Yes": 0.6678432758255444, "No": 0.3321561796653508}, "ground_truth": 1}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9449935054988494, "res": {"Yes": 0.9449935054988494, "No": 0.055006270013596016}, "ground_truth": 0}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9092436581079659, "res": {"Yes": 0.9092436581079659, "No": 0.0907562534201699}, "ground_truth": 0}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.32994556258267466, "res": {"No": 0.6700543838271192, "Yes": 0.32994556258267466}, "ground_truth": 0}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9851996687125207, "res": {"Yes": 0.9851996687125207, "No": 0.014800303376647932}, "ground_truth": 0}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8569988005302336, "res": {"Yes": 0.8569988005302336, "No": 0.14300103657103574}, "ground_truth": 1}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9430558737910313, "res": {"Yes": 0.9430558737910313, "No": 0.05694404272021914}, "ground_truth": 0}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7812542327704709, "res": {"Yes": 0.7812542327704709, "No": 0.21874561492554273}, "ground_truth": 0}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9832979234653455, "res": {"Yes": 0.9832979234653455, "No": 0.01670208528482361}, "ground_truth": 0}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9845787138260005, "res": {"Yes": 0.9845787138260005, "No": 0.015421247695735402}, "ground_truth": 0}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.988035988786904, "res": {"Yes": 0.988035988786904, "No": 0.011963890388899619}, "ground_truth": 1}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9633889872313932, "res": {"Yes": 0.9633889872313932, "No": 0.03661090983753375}, "ground_truth": 0}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9713627728482265, "res": {"Yes": 0.9713627728482265, "No": 0.028637059879444322}, "ground_truth": 0}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8885385718549553, "res": {"Yes": 0.8885385718549553, "No": 0.11146125915298186}, "ground_truth": 0}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.764091295673474, "res": {"Yes": 0.764091295673474, "No": 0.23590846720886502}, "ground_truth": 0}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.983655123118155, "res": {"Yes": 0.983655123118155, "No": 0.016344786516177653}, "ground_truth": 1}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9810955309160089, "res": {"Yes": 0.9810955309160089, "No": 0.0189044563310074}, "ground_truth": 0}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9770965909351574, "res": {"Yes": 0.9770965909351574, "No": 0.022903409340627776}, "ground_truth": 0}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6310435319457627, "res": {"Yes": 0.6310435319457627, "No": 0.36895603071330735}, "ground_truth": 0}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9636968883380852, "res": {"Yes": 0.9636968883380852, "No": 0.03630282924917252}, "ground_truth": 0}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9953025526791611, "res": {"Yes": 0.9953025526791611, "No": 0.004697295788502489}, "ground_truth": 1}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9836747226417426, "res": {"Yes": 0.9836747226417426, "No": 0.01632510780771163}, "ground_truth": 0}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9933435897027405, "res": {"Yes": 0.9933435897027405, "No": 0.006656176888633697}, "ground_truth": 0}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9550122866328802, "res": {"Yes": 0.9550122866328802, "No": 0.04498766220618716}, "ground_truth": 0}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.902523621238644, "res": {"Yes": 0.902523621238644, "No": 0.0974762519880014}, "ground_truth": 0}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9118347139835068, "res": {"Yes": 0.9118347139835068, "No": 0.08816519573729548}, "ground_truth": 1}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9789510506868989, "res": {"Yes": 0.9789510506868989, "No": 0.021048968972122033}, "ground_truth": 0}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9979141320523839, "res": {"Yes": 0.9979141320523839, "No": 0.002085795420166332}, "ground_truth": 0}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9656527031126098, "res": {"Yes": 0.9656527031126098, "No": 0.03434717174821858}, "ground_truth": 0}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9779623349597811, "res": {"Yes": 0.9779623349597811, "No": 0.022037524930115546}, "ground_truth": 0}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9727388699724473, "res": {"Yes": 0.9727388699724473, "No": 0.027260718384442488}, "ground_truth": 1}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8449647909566508, "res": {"Yes": 0.8449647909566508, "No": 0.1550349505596126}, "ground_truth": 0}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9924070816571878, "res": {"Yes": 0.9924070816571878, "No": 0.007592862634389213}, "ground_truth": 0}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9693397755410078, "res": {"Yes": 0.9693397755410078, "No": 0.030660150404178365}, "ground_truth": 0}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9969752970590786, "res": {"Yes": 0.9969752970590786, "No": 0.0030245317284335613}, "ground_truth": 0}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9923206798089478, "res": {"Yes": 0.9923206798089478, "No": 0.007679233818618125}, "ground_truth": 1}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9876141904435224, "res": {"Yes": 0.9876141904435224, "No": 0.012385555059103171}, "ground_truth": 0}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992559242017923, "res": {"Yes": 0.9992559242017923, "No": 0.0007439778463394347}, "ground_truth": 0}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8990576587738934, "res": {"Yes": 0.8990576587738934, "No": 0.10094191768340395}, "ground_truth": 0}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9788468703862232, "res": {"Yes": 0.9788468703862232, "No": 0.021152975453589143}, "ground_truth": 0}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9884417087582107, "res": {"Yes": 0.9884417087582107, "No": 0.011558201585836292}, "ground_truth": 1}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.43198082858423736, "res": {"No": 0.5680187217367749, "Yes": 0.43198082858423736}, "ground_truth": 0}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9933418239367449, "res": {"Yes": 0.9933418239367449, "No": 0.00665809414364909}, "ground_truth": 0}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9955702275137179, "res": {"Yes": 0.9955702275137179, "No": 0.004429741716047868}, "ground_truth": 0}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9635122568845149, "res": {"Yes": 0.9635122568845149, "No": 0.03648744683052321}, "ground_truth": 0}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9833844928918928, "res": {"Yes": 0.9833844928918928, "No": 0.016615537955480944}, "ground_truth": 1}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9857664761843957, "res": {"Yes": 0.9857664761843957, "No": 0.014233361051576661}, "ground_truth": 0}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.998100416496833, "res": {"Yes": 0.998100416496833, "No": 0.0018995865913204928}, "ground_truth": 0}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7289822905987744, "res": {"Yes": 0.7289822905987744, "No": 0.27101706218285593}, "ground_truth": 0}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8241015058074436, "res": {"Yes": 0.8241015058074436, "No": 0.17589828097967386}, "ground_truth": 0}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9556527377037212, "res": {"Yes": 0.9556527377037212, "No": 0.04434712489274472}, "ground_truth": 1}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9950072954896723, "res": {"Yes": 0.9950072954896723, "No": 0.004992662222311965}, "ground_truth": 0}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9839845305886037, "res": {"Yes": 0.9839845305886037, "No": 0.016015432345510824}, "ground_truth": 0}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9010096239691915, "res": {"Yes": 0.9010096239691915, "No": 0.09899011324961597}, "ground_truth": 0}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8934468690506496, "res": {"Yes": 0.8934468690506496, "No": 0.10655244220060484}, "ground_truth": 0}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99125724411426, "res": {"Yes": 0.99125724411426, "No": 0.008742532251375787}, "ground_truth": 1}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9973959902307726, "res": {"Yes": 0.9973959902307726, "No": 0.0026039058791726857}, "ground_truth": 0}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9818848079711151, "res": {"Yes": 0.9818848079711151, "No": 0.018115008235716646}, "ground_truth": 0}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8916277180194687, "res": {"Yes": 0.8916277180194687, "No": 0.10837199197239587}, "ground_truth": 0}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9737187254022477, "res": {"Yes": 0.9737187254022477, "No": 0.026281155960833812}, "ground_truth": 0}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9798306092122638, "res": {"Yes": 0.9798306092122638, "No": 0.02016932687814516}, "ground_truth": 1}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9488944536980094, "res": {"Yes": 0.9488944536980094, "No": 0.0511054772821008}, "ground_truth": 0}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9162275629443168, "res": {"Yes": 0.9162275629443168, "No": 0.08377228953377869}, "ground_truth": 0}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8640706350410366, "res": {"Yes": 0.8640706350410366, "No": 0.1359290256076353}, "ground_truth": 0}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9353434789376514, "res": {"Yes": 0.9353434789376514, "No": 0.06465624379534866}, "ground_truth": 0}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9700132323304214, "res": {"Yes": 0.9700132323304214, "No": 0.029986380012269963}, "ground_truth": 1}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9720915040339129, "res": {"Yes": 0.9720915040339129, "No": 0.027908197949498854}, "ground_truth": 0}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9781569903493195, "res": {"Yes": 0.9781569903493195, "No": 0.021842944844342085}, "ground_truth": 0}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9991300110899877, "res": {"Yes": 0.9991300110899877, "No": 0.0008698845989489672}, "ground_truth": 0}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9121201537982101, "res": {"Yes": 0.9121201537982101, "No": 0.08787980064143426}, "ground_truth": 0}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9962069002459882, "res": {"Yes": 0.9962069002459882, "No": 0.003793074784569733}, "ground_truth": 1}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9867113413124122, "res": {"Yes": 0.9867113413124122, "No": 0.013288534276986832}, "ground_truth": 0}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9910710271704957, "res": {"Yes": 0.9910710271704957, "No": 0.008928945287247407}, "ground_truth": 0}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.879088283543169, "res": {"Yes": 0.879088283543169, "No": 0.12091158535764554}, "ground_truth": 0}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9542413176592233, "res": {"Yes": 0.9542413176592233, "No": 0.045758605801468205}, "ground_truth": 0}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9736333982742992, "res": {"Yes": 0.9736333982742992, "No": 0.026366408086782006}, "ground_truth": 1}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8972410391680999, "res": {"Yes": 0.8972410391680999, "No": 0.10275894227009282}, "ground_truth": 0}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9904288468618037, "res": {"Yes": 0.9904288468618037, "No": 0.009571107059587816}, "ground_truth": 0}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.08792839402520265, "res": {"No": 0.9120711651403518, "Yes": 0.08792839402520265}, "ground_truth": 0}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8501504543871391, "res": {"Yes": 0.8501504543871391, "No": 0.14984938340140538}, "ground_truth": 0}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9765194370992103, "res": {"Yes": 0.9765194370992103, "No": 0.02348048331781652}, "ground_truth": 1}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9867756444255893, "res": {"Yes": 0.9867756444255893, "No": 0.013224267371663103}, "ground_truth": 0}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9968962766387662, "res": {"Yes": 0.9968962766387662, "No": 0.003103696991946826}, "ground_truth": 0}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6750750966510877, "res": {"Yes": 0.6750750966510877, "No": 0.3249243152259906}, "ground_truth": 0}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9274020189749755, "res": {"Yes": 0.9274020189749755, "No": 0.07259790603938872}, "ground_truth": 0}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8245557026135867, "res": {"Yes": 0.8245557026135867, "No": 0.17544424129089828}, "ground_truth": 1}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.964817929053879, "res": {"Yes": 0.964817929053879, "No": 0.035181985603719344}, "ground_truth": 0}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9662402110486239, "res": {"Yes": 0.9662402110486239, "No": 0.03375968972185374}, "ground_truth": 0}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9918804461044656, "res": {"Yes": 0.9918804461044656, "No": 0.00811936199451146}, "ground_truth": 0}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9913761490822659, "res": {"Yes": 0.9913761490822659, "No": 0.008623671128439665}, "ground_truth": 0}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984487292662414, "res": {"Yes": 0.9984487292662414, "No": 0.0015510510206549297}, "ground_truth": 1}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9747104192467956, "res": {"Yes": 0.9747104192467956, "No": 0.025289373278579302}, "ground_truth": 0}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9921108440359669, "res": {"Yes": 0.9921108440359669, "No": 0.007888999170623042}, "ground_truth": 0}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9878993824952281, "res": {"Yes": 0.9878993824952281, "No": 0.012100518210287695}, "ground_truth": 0}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9099024800445555, "res": {"Yes": 0.9099024800445555, "No": 0.0900973614031795}, "ground_truth": 0}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9550524151821947, "res": {"Yes": 0.9550524151821947, "No": 0.04494752413518796}, "ground_truth": 1}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9744554206018351, "res": {"Yes": 0.9744554206018351, "No": 0.02554450382383188}, "ground_truth": 0}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9958198358106605, "res": {"Yes": 0.9958198358106605, "No": 0.0041800950227021206}, "ground_truth": 0}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9462604355206329, "res": {"Yes": 0.9462604355206329, "No": 0.05373926246627316}, "ground_truth": 0}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.969372143305846, "res": {"Yes": 0.969372143305846, "No": 0.03062774137002901}, "ground_truth": 0}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9812008797421585, "res": {"Yes": 0.9812008797421585, "No": 0.01879896996016783}, "ground_truth": 1}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9385130010888573, "res": {"Yes": 0.9385130010888573, "No": 0.061486723839619804}, "ground_truth": 0}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9023260623930628, "res": {"Yes": 0.9023260623930628, "No": 0.09767376325098477}, "ground_truth": 0}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9665112891085009, "res": {"Yes": 0.9665112891085009, "No": 0.033488599885243116}, "ground_truth": 0}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9457309730907026, "res": {"Yes": 0.9457309730907026, "No": 0.054268973813215486}, "ground_truth": 0}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9690771869195859, "res": {"Yes": 0.9690771869195859, "No": 0.030922670220021476}, "ground_truth": 1}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9664101847515207, "res": {"Yes": 0.9664101847515207, "No": 0.03358975396617412}, "ground_truth": 0}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9684847812414044, "res": {"Yes": 0.9684847812414044, "No": 0.031515093159015455}, "ground_truth": 0}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6133368765991936, "res": {"Yes": 0.6133368765991936, "No": 0.38666295992370403}, "ground_truth": 0}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9753299880141832, "res": {"Yes": 0.9753299880141832, "No": 0.024669885246303196}, "ground_truth": 0}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9427926131156662, "res": {"Yes": 0.9427926131156662, "No": 0.05720722942693323}, "ground_truth": 1}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9480574246844725, "res": {"Yes": 0.9480574246844725, "No": 0.05194229957741087}, "ground_truth": 0}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9389918399131244, "res": {"Yes": 0.9389918399131244, "No": 0.06100793751972161}, "ground_truth": 0}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7738014599420048, "res": {"Yes": 0.7738014599420048, "No": 0.22619810338883775}, "ground_truth": 0}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.945156305046704, "res": {"Yes": 0.945156305046704, "No": 0.05484328634642021}, "ground_truth": 0}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8730142589498794, "res": {"Yes": 0.8730142589498794, "No": 0.1269854456845229}, "ground_truth": 1}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9733333512309083, "res": {"Yes": 0.9733333512309083, "No": 0.026666451546904092}, "ground_truth": 0}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9034121605339044, "res": {"Yes": 0.9034121605339044, "No": 0.09658754305122529}, "ground_truth": 0}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9878341146331076, "res": {"Yes": 0.9878341146331076, "No": 0.012165628980172875}, "ground_truth": 0}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9717339810760319, "res": {"Yes": 0.9717339810760319, "No": 0.02826589852626475}, "ground_truth": 0}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9949620920415622, "res": {"Yes": 0.9949620920415622, "No": 0.00503792997758518}, "ground_truth": 1}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9576186683434332, "res": {"Yes": 0.9576186683434332, "No": 0.04238104790685258}, "ground_truth": 0}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9945529876635855, "res": {"Yes": 0.9945529876635855, "No": 0.005446911108847856}, "ground_truth": 0}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7411971275375411, "res": {"Yes": 0.7411971275375411, "No": 0.25880277340763574}, "ground_truth": 0}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8395970958585621, "res": {"Yes": 0.8395970958585621, "No": 0.16040257982212183}, "ground_truth": 0}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6988160241724204, "res": {"Yes": 0.6988160241724204, "No": 0.30118367029926446}, "ground_truth": 1}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.915375257770463, "res": {"Yes": 0.915375257770463, "No": 0.08462475862866241}, "ground_truth": 0}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9198332862109083, "res": {"Yes": 0.9198332862109083, "No": 0.08016652862815739}, "ground_truth": 0}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7439976520505945, "res": {"Yes": 0.7439976520505945, "No": 0.2560020312407175}, "ground_truth": 0}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9738473524036694, "res": {"Yes": 0.9738473524036694, "No": 0.026152504988494606}, "ground_truth": 0}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9824291822219089, "res": {"Yes": 0.9824291822219089, "No": 0.01757082540146682}, "ground_truth": 1}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9720579314893889, "res": {"Yes": 0.9720579314893889, "No": 0.027941747007399185}, "ground_truth": 0}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9674279665773494, "res": {"Yes": 0.9674279665773494, "No": 0.032571825915413063}, "ground_truth": 0}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9589602254355009, "res": {"Yes": 0.9589602254355009, "No": 0.04103953535265968}, "ground_truth": 0}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9913624429060643, "res": {"Yes": 0.9913624429060643, "No": 0.008637478318937826}, "ground_truth": 0}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987241492904542, "res": {"Yes": 0.9987241492904542, "No": 0.0012758176650486557}, "ground_truth": 1}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.880948236644232, "res": {"Yes": 0.880948236644232, "No": 0.11905158409570403}, "ground_truth": 0}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9577822320875271, "res": {"Yes": 0.9577822320875271, "No": 0.04221755707632471}, "ground_truth": 0}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.94248712521746, "res": {"Yes": 0.94248712521746, "No": 0.057512267851293916}, "ground_truth": 0}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9845710873088207, "res": {"Yes": 0.9845710873088207, "No": 0.015428543671257676}, "ground_truth": 0}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9958844996786164, "res": {"Yes": 0.9958844996786164, "No": 0.0041153680820318}, "ground_truth": 1}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9937993792865444, "res": {"Yes": 0.9937993792865444, "No": 0.006200554843798029}, "ground_truth": 0}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9971760562517435, "res": {"Yes": 0.9971760562517435, "No": 0.002823830450297612}, "ground_truth": 0}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8056808118376153, "res": {"Yes": 0.8056808118376153, "No": 0.19431873581419912}, "ground_truth": 0}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7426854092870473, "res": {"Yes": 0.7426854092870473, "No": 0.2573141929808157}, "ground_truth": 0}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9650981046120982, "res": {"Yes": 0.9650981046120982, "No": 0.03490169093806652}, "ground_truth": 1}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9326277682127347, "res": {"Yes": 0.9326277682127347, "No": 0.06737197154517216}, "ground_truth": 0}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9538220722537482, "res": {"Yes": 0.9538220722537482, "No": 0.046177666817018743}, "ground_truth": 0}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8607923299400022, "res": {"Yes": 0.8607923299400022, "No": 0.13920745172603674}, "ground_truth": 0}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9914298092642021, "res": {"Yes": 0.9914298092642021, "No": 0.008570092953915994}, "ground_truth": 0}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9915759508394398, "res": {"Yes": 0.9915759508394398, "No": 0.00842399582296453}, "ground_truth": 1}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7487843905421829, "res": {"Yes": 0.7487843905421829, "No": 0.25121541018294835}, "ground_truth": 0}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6286138636861744, "res": {"Yes": 0.6286138636861744, "No": 0.3713858462392968}, "ground_truth": 0}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7131568833660803, "res": {"Yes": 0.7131568833660803, "No": 0.28684295732639803}, "ground_truth": 0}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8928528029632289, "res": {"Yes": 0.8928528029632289, "No": 0.10714699524138367}, "ground_truth": 0}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8645850916133794, "res": {"Yes": 0.8645850916133794, "No": 0.13541459941246575}, "ground_truth": 1}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9434943467286643, "res": {"Yes": 0.9434943467286643, "No": 0.056505403965296144}, "ground_truth": 0}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.954402641972239, "res": {"Yes": 0.954402641972239, "No": 0.045597134698283116}, "ground_truth": 0}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8382275633954831, "res": {"Yes": 0.8382275633954831, "No": 0.16177233041725478}, "ground_truth": 0}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6586215952157017, "res": {"Yes": 0.6586215952157017, "No": 0.34137831107645233}, "ground_truth": 0}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.962830899706727, "res": {"Yes": 0.962830899706727, "No": 0.037169018526952646}, "ground_truth": 1}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9644604048435255, "res": {"Yes": 0.9644604048435255, "No": 0.03553955775207677}, "ground_truth": 0}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9639539203315645, "res": {"Yes": 0.9639539203315645, "No": 0.03604601364022436}, "ground_truth": 0}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7281188417108574, "res": {"Yes": 0.7281188417108574, "No": 0.27188100228847334}, "ground_truth": 0}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9444058210830638, "res": {"Yes": 0.9444058210830638, "No": 0.055593966488934464}, "ground_truth": 0}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9926607534744585, "res": {"Yes": 0.9926607534744585, "No": 0.00733921052531108}, "ground_truth": 1}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9127922781410646, "res": {"Yes": 0.9127922781410646, "No": 0.08720759092184287}, "ground_truth": 0}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6392003928077428, "res": {"Yes": 0.6392003928077428, "No": 0.3607994915587138}, "ground_truth": 0}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9851024796185288, "res": {"Yes": 0.9851024796185288, "No": 0.014897349019714491}, "ground_truth": 0}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9314156834493394, "res": {"Yes": 0.9314156834493394, "No": 0.06858400052162812}, "ground_truth": 0}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9898006933198578, "res": {"Yes": 0.9898006933198578, "No": 0.010199113237206216}, "ground_truth": 1}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9789098073689997, "res": {"Yes": 0.9789098073689997, "No": 0.021090033416951235}, "ground_truth": 0}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9672939919187425, "res": {"Yes": 0.9672939919187425, "No": 0.032705755741774506}, "ground_truth": 0}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9421754943714885, "res": {"Yes": 0.9421754943714885, "No": 0.05782427550728612}, "ground_truth": 0}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6153331380188415, "res": {"Yes": 0.6153331380188415, "No": 0.38466630145746034}, "ground_truth": 0}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8480326767748846, "res": {"Yes": 0.8480326767748846, "No": 0.15196675487283393}, "ground_truth": 1}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8481737248178839, "res": {"Yes": 0.8481737248178839, "No": 0.15182573440943822}, "ground_truth": 0}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7703219766822558, "res": {"Yes": 0.7703219766822558, "No": 0.22967746048117094}, "ground_truth": 0}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8501470027833011, "res": {"Yes": 0.8501470027833011, "No": 0.14985278502101684}, "ground_truth": 0}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9837990856772639, "res": {"Yes": 0.9837990856772639, "No": 0.016200919161324403}, "ground_truth": 0}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9875545442980888, "res": {"Yes": 0.9875545442980888, "No": 0.012445235749891247}, "ground_truth": 1}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9855584787321869, "res": {"Yes": 0.9855584787321869, "No": 0.014441441841251575}, "ground_truth": 0}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9147861196926003, "res": {"Yes": 0.9147861196926003, "No": 0.08521373919417251}, "ground_truth": 0}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9490077081625308, "res": {"Yes": 0.9490077081625308, "No": 0.050992199955686573}, "ground_truth": 0}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.989032797128267, "res": {"Yes": 0.989032797128267, "No": 0.01096706048540978}, "ground_truth": 0}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8391647307596765, "res": {"Yes": 0.8391647307596765, "No": 0.1608351422152911}, "ground_truth": 1}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.969173692430789, "res": {"Yes": 0.969173692430789, "No": 0.030826218534229677}, "ground_truth": 0}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8609806919078409, "res": {"Yes": 0.8609806919078409, "No": 0.1390192453055365}, "ground_truth": 0}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9657100645867936, "res": {"Yes": 0.9657100645867936, "No": 0.03428990441700881}, "ground_truth": 0}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9500088439593388, "res": {"Yes": 0.9500088439593388, "No": 0.04999111446732002}, "ground_truth": 0}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9374049865064991, "res": {"Yes": 0.9374049865064991, "No": 0.06259486080580742}, "ground_truth": 1}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9925118195750141, "res": {"Yes": 0.9925118195750141, "No": 0.007488063946346707}, "ground_truth": 0}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.844332191498716, "res": {"Yes": 0.844332191498716, "No": 0.15566768005706197}, "ground_truth": 0}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.36015474235920525, "res": {"No": 0.6398447437168857, "Yes": 0.36015474235920525}, "ground_truth": 0}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8048512992944676, "res": {"Yes": 0.8048512992944676, "No": 0.19514773341529118}, "ground_truth": 0}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.33838854577688, "res": {"No": 0.6616108273529849, "Yes": 0.33838854577688}, "ground_truth": 1}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7662013153486129, "res": {"Yes": 0.7662013153486129, "No": 0.23379855744539704}, "ground_truth": 0}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7548654179448456, "res": {"Yes": 0.7548654179448456, "No": 0.2451343625971853}, "ground_truth": 0}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9941125453865896, "res": {"Yes": 0.9941125453865896, "No": 0.005887422401139542}, "ground_truth": 0}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9828707281106339, "res": {"Yes": 0.9828707281106339, "No": 0.017129245725254583}, "ground_truth": 0}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9902754492371946, "res": {"Yes": 0.9902754492371946, "No": 0.009724514632282497}, "ground_truth": 1}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9974326373272269, "res": {"Yes": 0.9974326373272269, "No": 0.0025673886144109924}, "ground_truth": 0}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9797808252837812, "res": {"Yes": 0.9797808252837812, "No": 0.020218990347390504}, "ground_truth": 0}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9501449624271034, "res": {"Yes": 0.9501449624271034, "No": 0.04985487469965481}, "ground_truth": 0}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9293643045981652, "res": {"Yes": 0.9293643045981652, "No": 0.0706354101516892}, "ground_truth": 0}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9131435197116466, "res": {"Yes": 0.9131435197116466, "No": 0.08685627829163725}, "ground_truth": 1}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9938855709708356, "res": {"Yes": 0.9938855709708356, "No": 0.0061143231671500105}, "ground_truth": 0}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992065329615893, "res": {"Yes": 0.9992065329615893, "No": 0.0007934238419362225}, "ground_truth": 0}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9400157411068, "res": {"Yes": 0.9400157411068, "No": 0.05998418109128439}, "ground_truth": 0}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.882343061485317, "res": {"Yes": 0.882343061485317, "No": 0.11765684317111179}, "ground_truth": 0}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.946372740587905, "res": {"Yes": 0.946372740587905, "No": 0.05362706469523031}, "ground_truth": 1}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.843597292366819, "res": {"Yes": 0.843597292366819, "No": 0.15640260421789406}, "ground_truth": 0}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8131906785271903, "res": {"Yes": 0.8131906785271903, "No": 0.18680906995559118}, "ground_truth": 0}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6208909353057196, "res": {"Yes": 0.6208909353057196, "No": 0.37910893035911175}, "ground_truth": 0}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9553332198990945, "res": {"Yes": 0.9553332198990945, "No": 0.044666700957250144}, "ground_truth": 0}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9142765748476801, "res": {"Yes": 0.9142765748476801, "No": 0.08572330957394415}, "ground_truth": 1}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8573331981256502, "res": {"Yes": 0.8573331981256502, "No": 0.14266664841510002}, "ground_truth": 0}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.08877553266517933, "res": {"No": 0.9112243629426515, "Yes": 0.08877553266517933}, "ground_truth": 0}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.987150595658236, "res": {"Yes": 0.987150595658236, "No": 0.012849244026423743}, "ground_truth": 0}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9684191531254213, "res": {"Yes": 0.9684191531254213, "No": 0.031580589608987596}, "ground_truth": 0}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7946115659652365, "res": {"Yes": 0.7946115659652365, "No": 0.20538750825743773}, "ground_truth": 1}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9975083067339104, "res": {"Yes": 0.9975083067339104, "No": 0.0024917026674573716}, "ground_truth": 0}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9940837950734103, "res": {"Yes": 0.9940837950734103, "No": 0.00591593335150971}, "ground_truth": 0}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9407909080001117, "res": {"Yes": 0.9407909080001117, "No": 0.05920896575471367}, "ground_truth": 0}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8930202785685778, "res": {"Yes": 0.8930202785685778, "No": 0.10697959111028038}, "ground_truth": 0}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9543639799043182, "res": {"Yes": 0.9543639799043182, "No": 0.045635831158430715}, "ground_truth": 1}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9779071569476249, "res": {"Yes": 0.9779071569476249, "No": 0.02209284010720234}, "ground_truth": 0}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9309808138751176, "res": {"Yes": 0.9309808138751176, "No": 0.06901905176228734}, "ground_truth": 0}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9183156060558383, "res": {"Yes": 0.9183156060558383, "No": 0.08168411719443235}, "ground_truth": 0}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6968234779871955, "res": {"Yes": 0.6968234779871955, "No": 0.3031761164255848}, "ground_truth": 0}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.904924373470451, "res": {"Yes": 0.904924373470451, "No": 0.09507464110850727}, "ground_truth": 1}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9664734335356975, "res": {"Yes": 0.9664734335356975, "No": 0.033526403044385956}, "ground_truth": 0}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.964040439078385, "res": {"Yes": 0.964040439078385, "No": 0.03595928330200082}, "ground_truth": 0}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8482886686271759, "res": {"Yes": 0.8482886686271759, "No": 0.1517108154771419}, "ground_truth": 0}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9403894086318194, "res": {"Yes": 0.9403894086318194, "No": 0.0596103537119049}, "ground_truth": 0}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9610371914310324, "res": {"Yes": 0.9610371914310324, "No": 0.038962485735390365}, "ground_truth": 1}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9387770466255523, "res": {"Yes": 0.9387770466255523, "No": 0.06122265366687117}, "ground_truth": 0}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7964146194834081, "res": {"Yes": 0.7964146194834081, "No": 0.2035849329981835}, "ground_truth": 0}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9133827352875805, "res": {"Yes": 0.9133827352875805, "No": 0.08661695477113832}, "ground_truth": 0}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8935402337657256, "res": {"Yes": 0.8935402337657256, "No": 0.1064591644240261}, "ground_truth": 0}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984048819552824, "res": {"Yes": 0.9984048819552824, "No": 0.0015950239933592621}, "ground_truth": 1}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9223696166695861, "res": {"Yes": 0.9223696166695861, "No": 0.07763008761051529}, "ground_truth": 0}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9810848605790401, "res": {"Yes": 0.9810848605790401, "No": 0.01891499850376955}, "ground_truth": 0}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9575435719592861, "res": {"Yes": 0.9575435719592861, "No": 0.04245620520972694}, "ground_truth": 0}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9486015011973566, "res": {"Yes": 0.9486015011973566, "No": 0.05139836070132192}, "ground_truth": 0}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9171392173342379, "res": {"Yes": 0.9171392173342379, "No": 0.08286036809252013}, "ground_truth": 1}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9241121409870068, "res": {"Yes": 0.9241121409870068, "No": 0.0758875833644061}, "ground_truth": 0}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9919110540235186, "res": {"Yes": 0.9919110540235186, "No": 0.008088880000140899}, "ground_truth": 0}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.94448365277379, "res": {"Yes": 0.94448365277379, "No": 0.05551609501507931}, "ground_truth": 0}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9982081408000202, "res": {"Yes": 0.9982081408000202, "No": 0.0017918660788618802}, "ground_truth": 0}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9831965103642356, "res": {"Yes": 0.9831965103642356, "No": 0.016803311481548604}, "ground_truth": 1}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9962079580190365, "res": {"Yes": 0.9962079580190365, "No": 0.0037920803707345704}, "ground_truth": 0}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9968588393008321, "res": {"Yes": 0.9968588393008321, "No": 0.0031409310822709804}, "ground_truth": 0}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9851804584914712, "res": {"Yes": 0.9851804584914712, "No": 0.014819511824112909}, "ground_truth": 0}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9950136677357986, "res": {"Yes": 0.9950136677357986, "No": 0.0049863190836991166}, "ground_truth": 0}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9974599110991688, "res": {"Yes": 0.9974599110991688, "No": 0.0025400259029293754}, "ground_truth": 1}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989352465526438, "res": {"Yes": 0.9989352465526438, "No": 0.0010647396178447405}, "ground_truth": 0}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992312859721322, "res": {"Yes": 0.9992312859721322, "No": 0.000768662166754036}, "ground_truth": 0}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9733886760975491, "res": {"Yes": 0.9733886760975491, "No": 0.026611154739195658}, "ground_truth": 0}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9758144581457806, "res": {"Yes": 0.9758144581457806, "No": 0.024185267794906005}, "ground_truth": 0}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9756614614582592, "res": {"Yes": 0.9756614614582592, "No": 0.024338429911872503}, "ground_truth": 1}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9965003845786093, "res": {"Yes": 0.9965003845786093, "No": 0.0034995776121930938}, "ground_truth": 0}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9909793533763599, "res": {"Yes": 0.9909793533763599, "No": 0.00902054973981835}, "ground_truth": 0}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9914888734715279, "res": {"Yes": 0.9914888734715279, "No": 0.008511041146420985}, "ground_truth": 0}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.980947187733484, "res": {"Yes": 0.980947187733484, "No": 0.019052826799360986}, "ground_truth": 0}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9903438241757242, "res": {"Yes": 0.9903438241757242, "No": 0.009656094582788}, "ground_truth": 1}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986736197209237, "res": {"Yes": 0.9986736197209237, "No": 0.001326325423377422}, "ground_truth": 0}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9968361012100366, "res": {"Yes": 0.9968361012100366, "No": 0.0031638868228230106}, "ground_truth": 0}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9994978484902921, "res": {"Yes": 0.9994978484902921, "No": 0.0005021357783718243}, "ground_truth": 0}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999251876885805, "res": {"Yes": 0.9999251876885805, "No": 7.476751939330449e-05}, "ground_truth": 0}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987610157755247, "res": {"Yes": 0.9987610157755247, "No": 0.0012388955273840473}, "ground_truth": 1}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9687396715473091, "res": {"Yes": 0.9687396715473091, "No": 0.03126017987020758}, "ground_truth": 0}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9914973140522351, "res": {"Yes": 0.9914973140522351, "No": 0.008502581123444564}, "ground_truth": 0}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9671641781689755, "res": {"Yes": 0.9671641781689755, "No": 0.03283561925224199}, "ground_truth": 0}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7826715401141413, "res": {"Yes": 0.7826715401141413, "No": 0.2173284228994018}, "ground_truth": 0}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9940744887032998, "res": {"Yes": 0.9940744887032998, "No": 0.0059254301229801284}, "ground_truth": 1}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9890721040507531, "res": {"Yes": 0.9890721040507531, "No": 0.010927781181598461}, "ground_truth": 0}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9380021386198438, "res": {"Yes": 0.9380021386198438, "No": 0.06199765606840451}, "ground_truth": 0}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6519235881581096, "res": {"Yes": 0.6519235881581096, "No": 0.3480760808395693}, "ground_truth": 0}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9677501737347339, "res": {"Yes": 0.9677501737347339, "No": 0.032249781853364266}, "ground_truth": 0}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9192083761999895, "res": {"Yes": 0.9192083761999895, "No": 0.08079154176991021}, "ground_truth": 1}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7930775808811042, "res": {"Yes": 0.7930775808811042, "No": 0.2069221796750647}, "ground_truth": 0}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9214212951640477, "res": {"Yes": 0.9214212951640477, "No": 0.07857854352155436}, "ground_truth": 0}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9837067909604115, "res": {"Yes": 0.9837067909604115, "No": 0.016293164743613548}, "ground_truth": 0}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7177278649176616, "res": {"Yes": 0.7177278649176616, "No": 0.28227184020749774}, "ground_truth": 0}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9921654076643225, "res": {"Yes": 0.9921654076643225, "No": 0.007834576946291368}, "ground_truth": 1}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9680193095715723, "res": {"Yes": 0.9680193095715723, "No": 0.03198050118093814}, "ground_truth": 0}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9984381550512785, "res": {"Yes": 0.9984381550512785, "No": 0.0015618644673363066}, "ground_truth": 0}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9771843430347361, "res": {"Yes": 0.9771843430347361, "No": 0.02281556044829579}, "ground_truth": 0}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9946149056860826, "res": {"Yes": 0.9946149056860826, "No": 0.0053851215320642205}, "ground_truth": 0}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998949402164745, "res": {"Yes": 0.998949402164745, "No": 0.001050557083282892}, "ground_truth": 1}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9961114233711977, "res": {"Yes": 0.9961114233711977, "No": 0.0038885816586935475}, "ground_truth": 0}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9925073528848722, "res": {"Yes": 0.9925073528848722, "No": 0.007492575365739157}, "ground_truth": 0}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9931624775612825, "res": {"Yes": 0.9931624775612825, "No": 0.006837435804614091}, "ground_truth": 0}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9532978860430108, "res": {"Yes": 0.9532978860430108, "No": 0.046701836014240806}, "ground_truth": 0}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9973617144088751, "res": {"Yes": 0.9973617144088751, "No": 0.002638089494842376}, "ground_truth": 1}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991093025067124, "res": {"Yes": 0.9991093025067124, "No": 0.0008906268654224811}, "ground_truth": 0}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992660453765584, "res": {"Yes": 0.9992660453765584, "No": 0.0007338884620181871}, "ground_truth": 0}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6547765019317153, "res": {"Yes": 0.6547765019317153, "No": 0.34522334313481257}, "ground_truth": 0}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9092470132232545, "res": {"Yes": 0.9092470132232545, "No": 0.09075277752222777}, "ground_truth": 0}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9231668459244993, "res": {"Yes": 0.9231668459244993, "No": 0.07683301103458325}, "ground_truth": 1}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9446319228431111, "res": {"Yes": 0.9446319228431111, "No": 0.05536787649863423}, "ground_truth": 0}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9289483213238007, "res": {"Yes": 0.9289483213238007, "No": 0.07105150090724574}, "ground_truth": 0}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6859566676104295, "res": {"Yes": 0.6859566676104295, "No": 0.31404305017607254}, "ground_truth": 0}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.5078210790690196, "res": {"Yes": 0.5078210790690196, "No": 0.492178314650315}, "ground_truth": 0}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9819829660101125, "res": {"Yes": 0.9819829660101125, "No": 0.018017027428175136}, "ground_truth": 1}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9392676106593245, "res": {"Yes": 0.9392676106593245, "No": 0.06073231805202089}, "ground_truth": 0}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6124853103211488, "res": {"Yes": 0.6124853103211488, "No": 0.3875146594754186}, "ground_truth": 0}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7491772909025991, "res": {"Yes": 0.7491772909025991, "No": 0.25082236488914783}, "ground_truth": 0}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9892923286542167, "res": {"Yes": 0.9892923286542167, "No": 0.010707402010491746}, "ground_truth": 0}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9372815983850329, "res": {"Yes": 0.9372815983850329, "No": 0.06271806843813921}, "ground_truth": 1}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9086606925607766, "res": {"Yes": 0.9086606925607766, "No": 0.0913390109050728}, "ground_truth": 0}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7851087152691237, "res": {"Yes": 0.7851087152691237, "No": 0.21489073270903203}, "ground_truth": 0}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.28909717549518654, "res": {"No": 0.7109026488515336, "Yes": 0.28909717549518654}, "ground_truth": 0}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9846022888609635, "res": {"Yes": 0.9846022888609635, "No": 0.015397691244344026}, "ground_truth": 0}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9248748665742329, "res": {"Yes": 0.9248748665742329, "No": 0.0751249538827874}, "ground_truth": 1}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9970085971883046, "res": {"Yes": 0.9970085971883046, "No": 0.0029914361176010254}, "ground_truth": 0}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9234571075082291, "res": {"Yes": 0.9234571075082291, "No": 0.07654264825209298}, "ground_truth": 0}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.2703168471616385, "res": {"No": 0.7296829020235664, "Yes": 0.2703168471616385}, "ground_truth": 0}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.08994830163280812, "res": {"No": 0.9100515342779952, "Yes": 0.08994830163280812}, "ground_truth": 0}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3693248388910395, "res": {"No": 0.6306750848857487, "Yes": 0.3693248388910395}, "ground_truth": 1}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5308815246789623, "res": {"Yes": 0.5308815246789623, "No": 0.4691183016591526}, "ground_truth": 0}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.46424200510321395, "res": {"No": 0.5357577923413607, "Yes": 0.46424200510321395}, "ground_truth": 0}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.995121794283013, "res": {"Yes": 0.995121794283013, "No": 0.004878146240673135}, "ground_truth": 0}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9928011440951783, "res": {"Yes": 0.9928011440951783, "No": 0.007198849913176846}, "ground_truth": 0}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9788863862531365, "res": {"Yes": 0.9788863862531365, "No": 0.021113357118356046}, "ground_truth": 1}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9954305836945507, "res": {"Yes": 0.9954305836945507, "No": 0.004569228073728445}, "ground_truth": 0}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9689228290946368, "res": {"Yes": 0.9689228290946368, "No": 0.031076906248230673}, "ground_truth": 0}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7685810548978723, "res": {"Yes": 0.7685810548978723, "No": 0.23141871273768588}, "ground_truth": 0}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6892841042983884, "res": {"Yes": 0.6892841042983884, "No": 0.3107154926908297}, "ground_truth": 0}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8584409532370765, "res": {"Yes": 0.8584409532370765, "No": 0.14155882838419845}, "ground_truth": 1}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9273947444620694, "res": {"Yes": 0.9273947444620694, "No": 0.0726050935878644}, "ground_truth": 0}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8114476128963298, "res": {"Yes": 0.8114476128963298, "No": 0.1885519902081273}, "ground_truth": 0}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9918480741346788, "res": {"Yes": 0.9918480741346788, "No": 0.008151836823552241}, "ground_truth": 0}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999321010092408, "res": {"Yes": 0.9999321010092408, "No": 6.775539299298584e-05}, "ground_truth": 0}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9978388709094385, "res": {"Yes": 0.9978388709094385, "No": 0.0021610814743852577}, "ground_truth": 1}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995320213361899, "res": {"Yes": 0.9995320213361899, "No": 0.0004679185573250855}, "ground_truth": 0}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9914235935144977, "res": {"Yes": 0.9914235935144977, "No": 0.008576380565473721}, "ground_truth": 0}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.34775158163269965, "res": {"No": 0.6522481638962008, "Yes": 0.34775158163269965}, "ground_truth": 0}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.5356968801432955, "res": {"Yes": 0.5356968801432955, "No": 0.46430254620787365}, "ground_truth": 0}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6289698856360963, "res": {"Yes": 0.6289698856360963, "No": 0.3710295982156179}, "ground_truth": 1}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7687107331463452, "res": {"Yes": 0.7687107331463452, "No": 0.231289177667151}, "ground_truth": 0}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.4693526378478772, "res": {"No": 0.5306465023776121, "Yes": 0.4693526378478772}, "ground_truth": 0}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9212336898540338, "res": {"Yes": 0.9212336898540338, "No": 0.07876600372644983}, "ground_truth": 0}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7204087642094252, "res": {"Yes": 0.7204087642094252, "No": 0.27959090437479334}, "ground_truth": 0}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9347626099793183, "res": {"Yes": 0.9347626099793183, "No": 0.06523725353401293}, "ground_truth": 1}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9864709268128713, "res": {"Yes": 0.9864709268128713, "No": 0.013528992550405851}, "ground_truth": 0}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8925423423899277, "res": {"Yes": 0.8925423423899277, "No": 0.10745718390590975}, "ground_truth": 0}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8163725025162066, "res": {"Yes": 0.8163725025162066, "No": 0.1836273218588059}, "ground_truth": 0}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9045666497662544, "res": {"Yes": 0.9045666497662544, "No": 0.09543304279139914}, "ground_truth": 0}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.808739820462897, "res": {"Yes": 0.808739820462897, "No": 0.19125971694702668}, "ground_truth": 1}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8906739104825216, "res": {"Yes": 0.8906739104825216, "No": 0.10932591633135089}, "ground_truth": 0}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7997184666011522, "res": {"Yes": 0.7997184666011522, "No": 0.20028134018218824}, "ground_truth": 0}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6515808316788131, "res": {"Yes": 0.6515808316788131, "No": 0.3484190350068948}, "ground_truth": 0}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9589106772407412, "res": {"Yes": 0.9589106772407412, "No": 0.04108894794867575}, "ground_truth": 0}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.983029096085379, "res": {"Yes": 0.983029096085379, "No": 0.01697076026854974}, "ground_truth": 1}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9939457446125713, "res": {"Yes": 0.9939457446125713, "No": 0.006054068824579532}, "ground_truth": 0}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.953834003688675, "res": {"Yes": 0.953834003688675, "No": 0.04616574528258291}, "ground_truth": 0}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.29236232701065823, "res": {"No": 0.7076373740814274, "Yes": 0.29236232701065823}, "ground_truth": 0}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.931316735103359, "res": {"Yes": 0.931316735103359, "No": 0.06868301120001205}, "ground_truth": 0}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3246089993032217, "res": {"No": 0.67539063296307, "Yes": 0.3246089993032217}, "ground_truth": 1}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9800181138185703, "res": {"Yes": 0.9800181138185703, "No": 0.01998178840515445}, "ground_truth": 0}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8851190404824651, "res": {"Yes": 0.8851190404824651, "No": 0.11488067477059545}, "ground_truth": 0}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9724083729253078, "res": {"Yes": 0.9724083729253078, "No": 0.027591417907307035}, "ground_truth": 0}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9853123731338873, "res": {"Yes": 0.9853123731338873, "No": 0.01468753652900002}, "ground_truth": 0}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9971137059789217, "res": {"Yes": 0.9971137059789217, "No": 0.0028862188208627466}, "ground_truth": 1}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9962539819970038, "res": {"Yes": 0.9962539819970038, "No": 0.0037459163713345713}, "ground_truth": 0}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9795882614296126, "res": {"Yes": 0.9795882614296126, "No": 0.020411757420751207}, "ground_truth": 0}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.25459577185696713, "res": {"No": 0.7454039446202615, "Yes": 0.25459577185696713}, "ground_truth": 0}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.604029120174534, "res": {"Yes": 0.604029120174534, "No": 0.3959703120930508}, "ground_truth": 0}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9765720768216807, "res": {"Yes": 0.9765720768216807, "No": 0.023427884986481115}, "ground_truth": 1}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6843057351585409, "res": {"Yes": 0.6843057351585409, "No": 0.3156938071058648}, "ground_truth": 0}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8896974481051, "res": {"Yes": 0.8896974481051, "No": 0.11030216771661233}, "ground_truth": 0}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9186608565930457, "res": {"Yes": 0.9186608565930457, "No": 0.08133883509778037}, "ground_truth": 0}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9940549312753832, "res": {"Yes": 0.9940549312753832, "No": 0.005944945002104584}, "ground_truth": 0}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.992200146959901, "res": {"Yes": 0.992200146959901, "No": 0.00779978117630494}, "ground_truth": 1}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9894460113245588, "res": {"Yes": 0.9894460113245588, "No": 0.010553943472913252}, "ground_truth": 0}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9979802604245501, "res": {"Yes": 0.9979802604245501, "No": 0.002019590118080001}, "ground_truth": 0}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.4814728045617523, "res": {"No": 0.5185270411606696, "Yes": 0.4814728045617523}, "ground_truth": 0}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9291314592406195, "res": {"Yes": 0.9291314592406195, "No": 0.07086843139211697}, "ground_truth": 0}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9188219487101739, "res": {"Yes": 0.9188219487101739, "No": 0.08117794353949276}, "ground_truth": 1}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9913479046824409, "res": {"Yes": 0.9913479046824409, "No": 0.008652023973784955}, "ground_truth": 0}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9104810161039886, "res": {"Yes": 0.9104810161039886, "No": 0.08951894501108895}, "ground_truth": 0}, {"key": "7c72b57e65ddefc77362ed64f7dbc0aebb87ac81", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6761862453584813, "res": {"Yes": 0.6761862453584813, "No": 0.32381328829241834}, "ground_truth": 0}, {"key": "7c72b57e65ddefc77362ed64f7dbc0aebb87ac81", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9790954703238872, "res": {"Yes": 0.9790954703238872, "No": 0.020904350160899733}, "ground_truth": 0}, {"key": "7c72b57e65ddefc77362ed64f7dbc0aebb87ac81", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9148643372495868, "res": {"Yes": 0.9148643372495868, "No": 0.08513525941606506}, "ground_truth": 1}, {"key": "7c72b57e65ddefc77362ed64f7dbc0aebb87ac81", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9090972362077676, "res": {"Yes": 0.9090972362077676, "No": 0.09090238868328673}, "ground_truth": 0}, {"key": "7c72b57e65ddefc77362ed64f7dbc0aebb87ac81", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8220443348827031, "res": {"Yes": 0.8220443348827031, "No": 0.17795527932557575}, "ground_truth": 0}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9096090565191552, "res": {"Yes": 0.9096090565191552, "No": 0.09039047338920017}, "ground_truth": 0}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9944949848367711, "res": {"Yes": 0.9944949848367711, "No": 0.0055048266437148635}, "ground_truth": 0}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992455685566148, "res": {"Yes": 0.9992455685566148, "No": 0.0007543408020237703}, "ground_truth": 1}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9969693694603741, "res": {"Yes": 0.9969693694603741, "No": 0.0030306353347660494}, "ground_truth": 0}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9922773669488832, "res": {"Yes": 0.9922773669488832, "No": 0.0077225821151037655}, "ground_truth": 0}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.980737317898292, "res": {"Yes": 0.980737317898292, "No": 0.019262559296536596}, "ground_truth": 0}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996061047471125, "res": {"Yes": 0.9996061047471125, "No": 0.00039377377054835075}, "ground_truth": 0}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9926540495626973, "res": {"Yes": 0.9926540495626973, "No": 0.007345826133878497}, "ground_truth": 1}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9961954254032742, "res": {"Yes": 0.9961954254032742, "No": 0.003804581045645039}, "ground_truth": 0}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9963071116459921, "res": {"Yes": 0.9963071116459921, "No": 0.003692810343881663}, "ground_truth": 0}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7015764111481486, "res": {"Yes": 0.7015764111481486, "No": 0.2984234284983966}, "ground_truth": 0}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9856769460386021, "res": {"Yes": 0.9856769460386021, "No": 0.014323019224120196}, "ground_truth": 0}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9693331675742812, "res": {"Yes": 0.9693331675742812, "No": 0.030666721578486163}, "ground_truth": 1}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8901379573589577, "res": {"Yes": 0.8901379573589577, "No": 0.109861983108192}, "ground_truth": 0}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.986417801851643, "res": {"Yes": 0.986417801851643, "No": 0.013582143865252007}, "ground_truth": 0}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.42315249357360435, "res": {"No": 0.5768472955078983, "Yes": 0.42315249357360435}, "ground_truth": 0}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9488277943063534, "res": {"Yes": 0.9488277943063534, "No": 0.05117192690224969}, "ground_truth": 0}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9480723178355349, "res": {"Yes": 0.9480723178355349, "No": 0.05192746697484805}, "ground_truth": 1}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.935286536959985, "res": {"Yes": 0.935286536959985, "No": 0.06471324398797752}, "ground_truth": 0}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.3398823332321378, "res": {"No": 0.6601174930715054, "Yes": 0.3398823332321378}, "ground_truth": 0}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9587936301584845, "res": {"Yes": 0.9587936301584845, "No": 0.04120620199897713}, "ground_truth": 0}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9185760497375097, "res": {"Yes": 0.9185760497375097, "No": 0.08142382161125741}, "ground_truth": 0}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9954818529747892, "res": {"Yes": 0.9954818529747892, "No": 0.004518088377174687}, "ground_truth": 1}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9538665423830359, "res": {"Yes": 0.9538665423830359, "No": 0.04613334368873055}, "ground_truth": 0}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9841516775078787, "res": {"Yes": 0.9841516775078787, "No": 0.01584826893627901}, "ground_truth": 0}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.828492313308366, "res": {"Yes": 0.828492313308366, "No": 0.17150729102748677}, "ground_truth": 0}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9296978947156297, "res": {"Yes": 0.9296978947156297, "No": 0.07030184048972796}, "ground_truth": 0}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8690993356069578, "res": {"Yes": 0.8690993356069578, "No": 0.13090040658470786}, "ground_truth": 1}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9166826094624654, "res": {"Yes": 0.9166826094624654, "No": 0.08331725570014129}, "ground_truth": 0}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9630807289617965, "res": {"Yes": 0.9630807289617965, "No": 0.036919035268913755}, "ground_truth": 0}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8740830281109456, "res": {"Yes": 0.8740830281109456, "No": 0.12591641061505768}, "ground_truth": 0}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9649135954941301, "res": {"Yes": 0.9649135954941301, "No": 0.0350862908106441}, "ground_truth": 0}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991107317036185, "res": {"Yes": 0.9991107317036185, "No": 0.0008891685903421305}, "ground_truth": 1}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9791211777920422, "res": {"Yes": 0.9791211777920422, "No": 0.0208787580103452}, "ground_truth": 0}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9829633188941793, "res": {"Yes": 0.9829633188941793, "No": 0.017036690430619474}, "ground_truth": 0}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9448781174971791, "res": {"Yes": 0.9448781174971791, "No": 0.05512158484827746}, "ground_truth": 0}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9662079391645183, "res": {"Yes": 0.9662079391645183, "No": 0.033791803699406896}, "ground_truth": 0}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8058544225277419, "res": {"Yes": 0.8058544225277419, "No": 0.1941449954219883}, "ground_truth": 1}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.953591879816888, "res": {"Yes": 0.953591879816888, "No": 0.04640789886864928}, "ground_truth": 0}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9264853181086838, "res": {"Yes": 0.9264853181086838, "No": 0.07351442746391872}, "ground_truth": 0}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9129857923580321, "res": {"Yes": 0.9129857923580321, "No": 0.08701404818257871}, "ground_truth": 0}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9767981462166943, "res": {"Yes": 0.9767981462166943, "No": 0.023201686708500417}, "ground_truth": 0}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998766124151556, "res": {"Yes": 0.998766124151556, "No": 0.0012338556964557547}, "ground_truth": 1}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9833982092344572, "res": {"Yes": 0.9833982092344572, "No": 0.016601702975433213}, "ground_truth": 0}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9888107129928229, "res": {"Yes": 0.9888107129928229, "No": 0.011189135680388313}, "ground_truth": 0}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.057738712686462426, "res": {"No": 0.942261100557064, "Yes": 0.057738712686462426}, "ground_truth": 0}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9455447316075601, "res": {"Yes": 0.9455447316075601, "No": 0.054455065697518734}, "ground_truth": 0}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9377015346003398, "res": {"Yes": 0.9377015346003398, "No": 0.062298246478336004}, "ground_truth": 1}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9772965947383118, "res": {"Yes": 0.9772965947383118, "No": 0.022703390600225894}, "ground_truth": 0}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9713843693968324, "res": {"Yes": 0.9713843693968324, "No": 0.028615532905571627}, "ground_truth": 0}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.06275485082336704, "res": {"No": 0.9372448482940504, "Yes": 0.06275485082336704}, "ground_truth": 0}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9952209601251552, "res": {"Yes": 0.9952209601251552, "No": 0.004779090607765177}, "ground_truth": 0}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9852386567904432, "res": {"Yes": 0.9852386567904432, "No": 0.014761251107813888}, "ground_truth": 1}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.981479043496437, "res": {"Yes": 0.981479043496437, "No": 0.01852095843043026}, "ground_truth": 0}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8479848490806681, "res": {"Yes": 0.8479848490806681, "No": 0.15201499677641567}, "ground_truth": 0}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9888410224833184, "res": {"Yes": 0.9888410224833184, "No": 0.01115878721974577}, "ground_truth": 0}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9044302061786536, "res": {"Yes": 0.9044302061786536, "No": 0.09556947586727321}, "ground_truth": 0}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9666356996825133, "res": {"Yes": 0.9666356996825133, "No": 0.033363993480767504}, "ground_truth": 1}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9706489427790325, "res": {"Yes": 0.9706489427790325, "No": 0.0293507787453118}, "ground_truth": 0}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9577518344774273, "res": {"Yes": 0.9577518344774273, "No": 0.0422479519884497}, "ground_truth": 0}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9081699030397722, "res": {"Yes": 0.9081699030397722, "No": 0.09183000343046639}, "ground_truth": 0}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.923847836513027, "res": {"Yes": 0.923847836513027, "No": 0.07615205605041576}, "ground_truth": 0}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8924522215844584, "res": {"Yes": 0.8924522215844584, "No": 0.107547604164202}, "ground_truth": 1}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5725154597432506, "res": {"Yes": 0.5725154597432506, "No": 0.4274844352671282}, "ground_truth": 0}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8651995110784156, "res": {"Yes": 0.8651995110784156, "No": 0.1348003613324141}, "ground_truth": 0}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.4856710966735361, "res": {"No": 0.5143287893660635, "Yes": 0.4856710966735361}, "ground_truth": 0}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9933748828034061, "res": {"Yes": 0.9933748828034061, "No": 0.006625052384809869}, "ground_truth": 0}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6948173317347979, "res": {"Yes": 0.6948173317347979, "No": 0.3051825978614675}, "ground_truth": 1}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9944451147494066, "res": {"Yes": 0.9944451147494066, "No": 0.00555483977328278}, "ground_truth": 0}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.542371710431633, "res": {"Yes": 0.542371710431633, "No": 0.45762788649789066}, "ground_truth": 0}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9038298367425635, "res": {"Yes": 0.9038298367425635, "No": 0.09616992501221673}, "ground_truth": 0}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9763696348845704, "res": {"Yes": 0.9763696348845704, "No": 0.02363025019064434}, "ground_truth": 0}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.824981761635753, "res": {"Yes": 0.824981761635753, "No": 0.17501736174643132}, "ground_truth": 1}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8321437139205372, "res": {"Yes": 0.8321437139205372, "No": 0.1678559848917869}, "ground_truth": 0}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9808345647858683, "res": {"Yes": 0.9808345647858683, "No": 0.01916536759109266}, "ground_truth": 0}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9880812557046157, "res": {"Yes": 0.9880812557046157, "No": 0.01191851983465894}, "ground_truth": 0}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986051433181516, "res": {"Yes": 0.9986051433181516, "No": 0.001394778859403655}, "ground_truth": 0}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998794542548257, "res": {"Yes": 0.998794542548257, "No": 0.0012053762742432381}, "ground_truth": 1}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9843378365764259, "res": {"Yes": 0.9843378365764259, "No": 0.01566211719375587}, "ground_truth": 0}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996939014577211, "res": {"Yes": 0.9996939014577211, "No": 0.0003060402281836997}, "ground_truth": 0}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9399826700547526, "res": {"Yes": 0.9399826700547526, "No": 0.060017085437742}, "ground_truth": 0}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7965861458401514, "res": {"Yes": 0.7965861458401514, "No": 0.20341374936247697}, "ground_truth": 0}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9495510547815048, "res": {"Yes": 0.9495510547815048, "No": 0.05044872525471316}, "ground_truth": 1}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.49736684820745836, "res": {"No": 0.5026329987196265, "Yes": 0.49736684820745836}, "ground_truth": 0}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9973551940782447, "res": {"Yes": 0.9973551940782447, "No": 0.0026446117786555977}, "ground_truth": 0}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5633702665269805, "res": {"Yes": 0.5633702665269805, "No": 0.43662939939241474}, "ground_truth": 0}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.983057095118826, "res": {"Yes": 0.983057095118826, "No": 0.016942912062774113}, "ground_truth": 0}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9668969692495789, "res": {"Yes": 0.9668969692495789, "No": 0.03310294591851729}, "ground_truth": 1}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6404697868194645, "res": {"Yes": 0.6404697868194645, "No": 0.359530128017036}, "ground_truth": 0}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8727275295923985, "res": {"Yes": 0.8727275295923985, "No": 0.1272725155136326}, "ground_truth": 0}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9072334726765471, "res": {"Yes": 0.9072334726765471, "No": 0.09276604505029973}, "ground_truth": 0}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9638235543800359, "res": {"Yes": 0.9638235543800359, "No": 0.036176262763395695}, "ground_truth": 0}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993600818350187, "res": {"Yes": 0.9993600818350187, "No": 0.0006398763521766576}, "ground_truth": 1}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9910949172255393, "res": {"Yes": 0.9910949172255393, "No": 0.008905009459256782}, "ground_truth": 0}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9423345649229607, "res": {"Yes": 0.9423345649229607, "No": 0.05766529266816141}, "ground_truth": 0}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9632042481332135, "res": {"Yes": 0.9632042481332135, "No": 0.03679553767467694}, "ground_truth": 0}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9982206129895521, "res": {"Yes": 0.9982206129895521, "No": 0.0017793956407486688}, "ground_truth": 0}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9975982189028212, "res": {"Yes": 0.9975982189028212, "No": 0.0024017347265383668}, "ground_truth": 1}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9546967750079864, "res": {"Yes": 0.9546967750079864, "No": 0.045303050898872155}, "ground_truth": 0}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9595456546088666, "res": {"Yes": 0.9595456546088666, "No": 0.04045416799979264}, "ground_truth": 0}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7632234755138776, "res": {"Yes": 0.7632234755138776, "No": 0.23677586562019493}, "ground_truth": 0}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9797517575774491, "res": {"Yes": 0.9797517575774491, "No": 0.02024810236886841}, "ground_truth": 0}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9834054696902379, "res": {"Yes": 0.9834054696902379, "No": 0.01659436000642139}, "ground_truth": 1}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7915944075666044, "res": {"Yes": 0.7915944075666044, "No": 0.20840534950970788}, "ground_truth": 0}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.94118988886406, "res": {"Yes": 0.94118988886406, "No": 0.05880990416978719}, "ground_truth": 0}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.15644288307416035, "res": {"No": 0.8435570031222505, "Yes": 0.15644288307416035}, "ground_truth": 0}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6418696713679908, "res": {"Yes": 0.6418696713679908, "No": 0.3581302972498178}, "ground_truth": 0}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7638623776558485, "res": {"Yes": 0.7638623776558485, "No": 0.2361375511235774}, "ground_truth": 1}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9892572074214054, "res": {"Yes": 0.9892572074214054, "No": 0.01074265944475348}, "ground_truth": 0}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.4270960711827077, "res": {"No": 0.5729037797589495, "Yes": 0.4270960711827077}, "ground_truth": 0}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.999239378269477, "res": {"Yes": 0.999239378269477, "No": 0.000760472961865133}, "ground_truth": 0}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9983736332700721, "res": {"Yes": 0.9983736332700721, "No": 0.0016263451841691538}, "ground_truth": 0}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998704861276457, "res": {"Yes": 0.9998704861276457, "No": 0.0001294058458931788}, "ground_truth": 1}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993280628209638, "res": {"Yes": 0.9993280628209638, "No": 0.0006719020412609469}, "ground_truth": 0}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9633121947024256, "res": {"Yes": 0.9633121947024256, "No": 0.036687671749037844}, "ground_truth": 0}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9982290502852965, "res": {"Yes": 0.9982290502852965, "No": 0.0017708787263937768}, "ground_truth": 0}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9238762267932575, "res": {"Yes": 0.9238762267932575, "No": 0.07612361095614979}, "ground_truth": 0}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984222288916007, "res": {"Yes": 0.9984222288916007, "No": 0.0015777570959464643}, "ground_truth": 1}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9769607145836086, "res": {"Yes": 0.9769607145836086, "No": 0.023039194608355504}, "ground_truth": 0}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9809555650582383, "res": {"Yes": 0.9809555650582383, "No": 0.019044411119641558}, "ground_truth": 0}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9150311779727249, "res": {"Yes": 0.9150311779727249, "No": 0.08496875315653753}, "ground_truth": 0}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8937712884614744, "res": {"Yes": 0.8937712884614744, "No": 0.10622864283439601}, "ground_truth": 0}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9742597471894221, "res": {"Yes": 0.9742597471894221, "No": 0.025740174435787442}, "ground_truth": 1}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8085363752049362, "res": {"Yes": 0.8085363752049362, "No": 0.1914634213181325}, "ground_truth": 0}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9710703045868334, "res": {"Yes": 0.9710703045868334, "No": 0.02892957068181467}, "ground_truth": 0}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9481314718089374, "res": {"Yes": 0.9481314718089374, "No": 0.05186834488680557}, "ground_truth": 0}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9626042856136371, "res": {"Yes": 0.9626042856136371, "No": 0.0373954776620441}, "ground_truth": 0}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9805592821805571, "res": {"Yes": 0.9805592821805571, "No": 0.019440569210558156}, "ground_truth": 1}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9971541318819708, "res": {"Yes": 0.9971541318819708, "No": 0.0028458727277748726}, "ground_truth": 0}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9849706137612569, "res": {"Yes": 0.9849706137612569, "No": 0.015029249649246028}, "ground_truth": 0}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9386758847680743, "res": {"Yes": 0.9386758847680743, "No": 0.061324042652937505}, "ground_truth": 0}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9887520891323525, "res": {"Yes": 0.9887520891323525, "No": 0.011247827276068918}, "ground_truth": 0}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9479157964331051, "res": {"Yes": 0.9479157964331051, "No": 0.05208412912241984}, "ground_truth": 1}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9469757152502506, "res": {"Yes": 0.9469757152502506, "No": 0.053024134561189644}, "ground_truth": 0}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6703806457068648, "res": {"Yes": 0.6703806457068648, "No": 0.3296192429217766}, "ground_truth": 0}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8720580466757124, "res": {"Yes": 0.8720580466757124, "No": 0.12794126340863907}, "ground_truth": 0}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988282003939685, "res": {"Yes": 0.9988282003939685, "No": 0.0011717537744020184}, "ground_truth": 0}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991703464130128, "res": {"Yes": 0.9991703464130128, "No": 0.0008296142701129445}, "ground_truth": 1}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986813465884786, "res": {"Yes": 0.9986813465884786, "No": 0.0013186121460564396}, "ground_truth": 0}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9464753499899466, "res": {"Yes": 0.9464753499899466, "No": 0.05352435473359407}, "ground_truth": 0}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9363216297510519, "res": {"Yes": 0.9363216297510519, "No": 0.06367813658782612}, "ground_truth": 0}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9808779048994539, "res": {"Yes": 0.9808779048994539, "No": 0.01912210276800183}, "ground_truth": 0}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9249734875394424, "res": {"Yes": 0.9249734875394424, "No": 0.07502641705975178}, "ground_truth": 1}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9929809550751411, "res": {"Yes": 0.9929809550751411, "No": 0.007019049984118387}, "ground_truth": 0}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9908632342465201, "res": {"Yes": 0.9908632342465201, "No": 0.0091367521619225}, "ground_truth": 0}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9325926330560402, "res": {"Yes": 0.9325926330560402, "No": 0.0674072230427128}, "ground_truth": 0}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.43272519761243006, "res": {"No": 0.5672745371121864, "Yes": 0.43272519761243006}, "ground_truth": 0}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8628642071911871, "res": {"Yes": 0.8628642071911871, "No": 0.13713564959405253}, "ground_truth": 1}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8313399270052155, "res": {"Yes": 0.8313399270052155, "No": 0.16866002105478062}, "ground_truth": 0}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9768844833594044, "res": {"Yes": 0.9768844833594044, "No": 0.023115474360309415}, "ground_truth": 0}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.960484689534498, "res": {"Yes": 0.960484689534498, "No": 0.0395149973258303}, "ground_truth": 0}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9867871374684495, "res": {"Yes": 0.9867871374684495, "No": 0.013212569072300178}, "ground_truth": 0}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.975651140965905, "res": {"Yes": 0.975651140965905, "No": 0.02434876133369974}, "ground_truth": 1}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8433266675928641, "res": {"Yes": 0.8433266675928641, "No": 0.15667305455845468}, "ground_truth": 0}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.987548738481989, "res": {"Yes": 0.987548738481989, "No": 0.012451004208586402}, "ground_truth": 0}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8092545934582893, "res": {"Yes": 0.8092545934582893, "No": 0.19074519484247565}, "ground_truth": 0}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8929906129276728, "res": {"Yes": 0.8929906129276728, "No": 0.10700911085152875}, "ground_truth": 0}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9361090901409498, "res": {"Yes": 0.9361090901409498, "No": 0.06389065323820364}, "ground_truth": 1}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9735732811888999, "res": {"Yes": 0.9735732811888999, "No": 0.026426404452763765}, "ground_truth": 0}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9736806284286936, "res": {"Yes": 0.9736806284286936, "No": 0.026319154418441813}, "ground_truth": 0}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.912715104816496, "res": {"Yes": 0.912715104816496, "No": 0.08728474270212704}, "ground_truth": 0}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9982891599581637, "res": {"Yes": 0.9982891599581637, "No": 0.0017106407004779623}, "ground_truth": 0}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994431922749886, "res": {"Yes": 0.9994431922749886, "No": 0.0005567910628437804}, "ground_truth": 1}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9840520539237918, "res": {"Yes": 0.9840520539237918, "No": 0.0159479433761369}, "ground_truth": 0}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8913163090188728, "res": {"Yes": 0.8913163090188728, "No": 0.10868309681812005}, "ground_truth": 0}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9756350252934582, "res": {"Yes": 0.9756350252934582, "No": 0.024364707687214146}, "ground_truth": 0}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9400709423318965, "res": {"Yes": 0.9400709423318965, "No": 0.059928685420624773}, "ground_truth": 0}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9887469545560855, "res": {"Yes": 0.9887469545560855, "No": 0.011252875308609104}, "ground_truth": 1}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9847473590331636, "res": {"Yes": 0.9847473590331636, "No": 0.015252513039951492}, "ground_truth": 0}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9554450682614534, "res": {"Yes": 0.9554450682614534, "No": 0.04455467465690001}, "ground_truth": 0}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6930039153170784, "res": {"Yes": 0.6930039153170784, "No": 0.3069958651660918}, "ground_truth": 0}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9917295280988945, "res": {"Yes": 0.9917295280988945, "No": 0.008270378526120357}, "ground_truth": 0}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8846700396930907, "res": {"Yes": 0.8846700396930907, "No": 0.11532987253092937}, "ground_truth": 1}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9927970272552097, "res": {"Yes": 0.9927970272552097, "No": 0.007202968833226013}, "ground_truth": 0}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9525627498915342, "res": {"Yes": 0.9525627498915342, "No": 0.04743706945096342}, "ground_truth": 0}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9751906428205535, "res": {"Yes": 0.9751906428205535, "No": 0.02480922795653035}, "ground_truth": 0}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9489805432056849, "res": {"Yes": 0.9489805432056849, "No": 0.051019151354075525}, "ground_truth": 0}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9864516701018646, "res": {"Yes": 0.9864516701018646, "No": 0.01354824852715203}, "ground_truth": 1}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.996503102238164, "res": {"Yes": 0.996503102238164, "No": 0.003496890968390204}, "ground_truth": 0}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9406334149111208, "res": {"Yes": 0.9406334149111208, "No": 0.05936640410953385}, "ground_truth": 0}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.19406809041069087, "res": {"No": 0.8059317640878472, "Yes": 0.19406809041069087}, "ground_truth": 0}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9000900365954037, "res": {"Yes": 0.9000900365954037, "No": 0.0999098199860418}, "ground_truth": 0}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9668591894555111, "res": {"Yes": 0.9668591894555111, "No": 0.033140691539055626}, "ground_truth": 1}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4856816067097862, "res": {"No": 0.5143183228816968, "Yes": 0.4856816067097862}, "ground_truth": 0}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6603935194844397, "res": {"Yes": 0.6603935194844397, "No": 0.33960574762410106}, "ground_truth": 0}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9539199050777439, "res": {"Yes": 0.9539199050777439, "No": 0.046079809348749015}, "ground_truth": 0}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8464379939527532, "res": {"Yes": 0.8464379939527532, "No": 0.1535614908981728}, "ground_truth": 0}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9616444511808425, "res": {"Yes": 0.9616444511808425, "No": 0.038355298463312266}, "ground_truth": 1}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984679849350407, "res": {"Yes": 0.9984679849350407, "No": 0.0015318941541974547}, "ground_truth": 0}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9809674955123696, "res": {"Yes": 0.9809674955123696, "No": 0.019032369338223214}, "ground_truth": 0}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9907436323826231, "res": {"Yes": 0.9907436323826231, "No": 0.00925632293905758}, "ground_truth": 0}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999641651360841, "res": {"Yes": 0.9999641651360841, "No": 3.578867698219314e-05}, "ground_truth": 0}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9972261994135249, "res": {"Yes": 0.9972261994135249, "No": 0.002773655926306657}, "ground_truth": 1}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9955901964633772, "res": {"Yes": 0.9955901964633772, "No": 0.004409786514965613}, "ground_truth": 0}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9636434162086895, "res": {"Yes": 0.9636434162086895, "No": 0.0363565245018198}, "ground_truth": 0}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9256932895407572, "res": {"Yes": 0.9256932895407572, "No": 0.07430663731174289}, "ground_truth": 0}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8820473849370735, "res": {"Yes": 0.8820473849370735, "No": 0.11795260355423522}, "ground_truth": 0}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9477478643289967, "res": {"Yes": 0.9477478643289967, "No": 0.052252005818669144}, "ground_truth": 1}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6822470795625951, "res": {"Yes": 0.6822470795625951, "No": 0.3177527863341868}, "ground_truth": 0}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.4623241086434358, "res": {"No": 0.5376757745617702, "Yes": 0.4623241086434358}, "ground_truth": 0}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9941680373034977, "res": {"Yes": 0.9941680373034977, "No": 0.005831980079530371}, "ground_truth": 0}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9519658278029377, "res": {"Yes": 0.9519658278029377, "No": 0.048034104977968554}, "ground_truth": 0}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.395504268169817, "res": {"No": 0.6044954112139636, "Yes": 0.395504268169817}, "ground_truth": 1}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8873844404979351, "res": {"Yes": 0.8873844404979351, "No": 0.11261531050574888}, "ground_truth": 0}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9245603978520769, "res": {"Yes": 0.9245603978520769, "No": 0.07543936937379499}, "ground_truth": 0}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7842763292134344, "res": {"Yes": 0.7842763292134344, "No": 0.2157235181011357}, "ground_truth": 0}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.991222688497798, "res": {"Yes": 0.991222688497798, "No": 0.008777216051780503}, "ground_truth": 0}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.917949519064019, "res": {"Yes": 0.917949519064019, "No": 0.08205028401213889}, "ground_truth": 1}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9969491170309156, "res": {"Yes": 0.9969491170309156, "No": 0.0030508714739856508}, "ground_truth": 0}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8961320496720346, "res": {"Yes": 0.8961320496720346, "No": 0.10386746162920164}, "ground_truth": 0}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8482644842619831, "res": {"Yes": 0.8482644842619831, "No": 0.15173542496718798}, "ground_truth": 0}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9908517640801099, "res": {"Yes": 0.9908517640801099, "No": 0.009148151883872864}, "ground_truth": 0}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.889917924652503, "res": {"Yes": 0.889917924652503, "No": 0.11008188291216854}, "ground_truth": 1}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9915792339528483, "res": {"Yes": 0.9915792339528483, "No": 0.008420715315511787}, "ground_truth": 0}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9881104736993328, "res": {"Yes": 0.9881104736993328, "No": 0.011889447939659277}, "ground_truth": 0}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5280978449704877, "res": {"Yes": 0.5280978449704877, "No": 0.47190178390325016}, "ground_truth": 0}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9622820769635283, "res": {"Yes": 0.9622820769635283, "No": 0.03771786213688704}, "ground_truth": 0}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9748669605621709, "res": {"Yes": 0.9748669605621709, "No": 0.025132905885228145}, "ground_truth": 1}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7943181224337191, "res": {"Yes": 0.7943181224337191, "No": 0.20568168987886182}, "ground_truth": 0}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7675583164976041, "res": {"Yes": 0.7675583164976041, "No": 0.23244142992303327}, "ground_truth": 0}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7973062424638084, "res": {"Yes": 0.7973062424638084, "No": 0.20269325537596436}, "ground_truth": 0}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8668768186302751, "res": {"Yes": 0.8668768186302751, "No": 0.13312304873706865}, "ground_truth": 0}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9843371416341586, "res": {"Yes": 0.9843371416341586, "No": 0.01566282827001778}, "ground_truth": 1}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9111907084035329, "res": {"Yes": 0.9111907084035329, "No": 0.08880916720693294}, "ground_truth": 0}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.32404982310387925, "res": {"No": 0.6759500068755759, "Yes": 0.32404982310387925}, "ground_truth": 0}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9809923839544192, "res": {"Yes": 0.9809923839544192, "No": 0.0190076053182202}, "ground_truth": 0}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9244919274445024, "res": {"Yes": 0.9244919274445024, "No": 0.07550805044507301}, "ground_truth": 0}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9955950404197029, "res": {"Yes": 0.9955950404197029, "No": 0.004404964846451813}, "ground_truth": 1}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8061244692890672, "res": {"Yes": 0.8061244692890672, "No": 0.1938754570580273}, "ground_truth": 0}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9787560640377707, "res": {"Yes": 0.9787560640377707, "No": 0.021243918772063927}, "ground_truth": 0}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.893301106151156, "res": {"Yes": 0.893301106151156, "No": 0.10669855109579922}, "ground_truth": 0}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8098378552967983, "res": {"Yes": 0.8098378552967983, "No": 0.19016168405114348}, "ground_truth": 0}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9663035356758221, "res": {"Yes": 0.9663035356758221, "No": 0.03369640917404246}, "ground_truth": 1}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9740987543268081, "res": {"Yes": 0.9740987543268081, "No": 0.02590118651334215}, "ground_truth": 0}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9963698318502049, "res": {"Yes": 0.9963698318502049, "No": 0.0036301885133552957}, "ground_truth": 0}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9054321730373591, "res": {"Yes": 0.9054321730373591, "No": 0.0945675485839505}, "ground_truth": 0}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994768903695767, "res": {"Yes": 0.9994768903695767, "No": 0.0005230983823616315}, "ground_truth": 0}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997383407987415, "res": {"Yes": 0.9997383407987415, "No": 0.0002615581312314108}, "ground_truth": 1}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9969270825056478, "res": {"Yes": 0.9969270825056478, "No": 0.0030728564814487195}, "ground_truth": 0}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9970581306034395, "res": {"Yes": 0.9970581306034395, "No": 0.0029419030827104974}, "ground_truth": 0}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9998596400713766, "res": {"Yes": 0.9998596400713766, "No": 0.00014026809531924166}, "ground_truth": 0}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999906277489198, "res": {"Yes": 0.9999906277489198, "No": 9.32963187443245e-06}, "ground_truth": 0}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992506904726997, "res": {"Yes": 0.9992506904726997, "No": 0.0007493028627765595}, "ground_truth": 1}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991595189422366, "res": {"Yes": 0.9991595189422366, "No": 0.0008404643678549807}, "ground_truth": 0}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.99602888232471, "res": {"Yes": 0.99602888232471, "No": 0.003970941733281413}, "ground_truth": 0}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9680617620212223, "res": {"Yes": 0.9680617620212223, "No": 0.031938008162186886}, "ground_truth": 0}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994504560046448, "res": {"Yes": 0.9994504560046448, "No": 0.0005494586704286081}, "ground_truth": 0}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989678411427747, "res": {"Yes": 0.9989678411427747, "No": 0.0010321523467481165}, "ground_truth": 1}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9929129037552443, "res": {"Yes": 0.9929129037552443, "No": 0.0070870386891301335}, "ground_truth": 0}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9970504305526415, "res": {"Yes": 0.9970504305526415, "No": 0.002949578500574737}, "ground_truth": 0}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8083780469452253, "res": {"Yes": 0.8083780469452253, "No": 0.19162175041059287}, "ground_truth": 0}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9299992772366779, "res": {"Yes": 0.9299992772366779, "No": 0.0700004925870276}, "ground_truth": 0}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8220099494873506, "res": {"Yes": 0.8220099494873506, "No": 0.1779899483906457}, "ground_truth": 1}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8835488962064779, "res": {"Yes": 0.8835488962064779, "No": 0.11645080148755876}, "ground_truth": 0}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9624566209294989, "res": {"Yes": 0.9624566209294989, "No": 0.03754324539158098}, "ground_truth": 0}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6061450046108666, "res": {"Yes": 0.6061450046108666, "No": 0.3938547470368073}, "ground_truth": 0}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8180620577802367, "res": {"Yes": 0.8180620577802367, "No": 0.181937856138692}, "ground_truth": 0}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9940948686321134, "res": {"Yes": 0.9940948686321134, "No": 0.00590508579633353}, "ground_truth": 1}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9320709915559994, "res": {"Yes": 0.9320709915559994, "No": 0.0679287307927196}, "ground_truth": 0}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9473420024138909, "res": {"Yes": 0.9473420024138909, "No": 0.05265775277792038}, "ground_truth": 0}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9866753694327594, "res": {"Yes": 0.9866753694327594, "No": 0.013324440480742787}, "ground_truth": 0}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9956723195146359, "res": {"Yes": 0.9956723195146359, "No": 0.004327721880567907}, "ground_truth": 0}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9978881360293128, "res": {"Yes": 0.9978881360293128, "No": 0.002111888939467902}, "ground_truth": 1}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988021550896321, "res": {"Yes": 0.9988021550896321, "No": 0.001197635000202401}, "ground_truth": 0}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988466335389631, "res": {"Yes": 0.9988466335389631, "No": 0.0011533579902265388}, "ground_truth": 0}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9623088951764279, "res": {"Yes": 0.9623088951764279, "No": 0.037690865813434136}, "ground_truth": 0}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9983655515680585, "res": {"Yes": 0.9983655515680585, "No": 0.00163440541716381}, "ground_truth": 0}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9481061788430357, "res": {"Yes": 0.9481061788430357, "No": 0.05189348118868066}, "ground_truth": 1}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995907410093171, "res": {"Yes": 0.9995907410093171, "No": 0.0004092272224799422}, "ground_truth": 0}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9175873437837, "res": {"Yes": 0.9175873437837, "No": 0.08241243032378129}, "ground_truth": 0}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9732874927370746, "res": {"Yes": 0.9732874927370746, "No": 0.026712429434666878}, "ground_truth": 0}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8876264135260784, "res": {"Yes": 0.8876264135260784, "No": 0.11237349263325067}, "ground_truth": 0}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9895148653103117, "res": {"Yes": 0.9895148653103117, "No": 0.010485090324766293}, "ground_truth": 1}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9297456173333559, "res": {"Yes": 0.9297456173333559, "No": 0.070254241174492}, "ground_truth": 0}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9791244999558345, "res": {"Yes": 0.9791244999558345, "No": 0.020875480302644363}, "ground_truth": 0}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9998291323495401, "res": {"Yes": 0.9998291323495401, "No": 0.00017079014112950113}, "ground_truth": 0}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9975703358237201, "res": {"Yes": 0.9975703358237201, "No": 0.0024294577370730518}, "ground_truth": 0}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997064104058621, "res": {"Yes": 0.9997064104058621, "No": 0.0002935011501395654}, "ground_truth": 1}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999971839107652, "res": {"Yes": 0.9999971839107652, "No": 2.7056916099258427e-06}, "ground_truth": 0}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9973899447327789, "res": {"Yes": 0.9973899447327789, "No": 0.002609906192145914}, "ground_truth": 0}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9146840901391037, "res": {"Yes": 0.9146840901391037, "No": 0.08531580964343992}, "ground_truth": 0}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8575942272270474, "res": {"Yes": 0.8575942272270474, "No": 0.14240555086079246}, "ground_truth": 0}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9783101993381635, "res": {"Yes": 0.9783101993381635, "No": 0.0216896736578244}, "ground_truth": 1}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9785997535172247, "res": {"Yes": 0.9785997535172247, "No": 0.021400178839569968}, "ground_truth": 0}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9144879516801552, "res": {"Yes": 0.9144879516801552, "No": 0.08551187703472965}, "ground_truth": 0}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9646904367887915, "res": {"Yes": 0.9646904367887915, "No": 0.03530935980167116}, "ground_truth": 0}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.99714476346348, "res": {"Yes": 0.99714476346348, "No": 0.0028551111640848594}, "ground_truth": 0}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9953177888872552, "res": {"Yes": 0.9953177888872552, "No": 0.004682232279376257}, "ground_truth": 1}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9939774271381184, "res": {"Yes": 0.9939774271381184, "No": 0.006022406191110292}, "ground_truth": 0}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9977921169673184, "res": {"Yes": 0.9977921169673184, "No": 0.0022078512974601436}, "ground_truth": 0}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7439240520035635, "res": {"Yes": 0.7439240520035635, "No": 0.25607533511521063}, "ground_truth": 0}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.95954949567781, "res": {"Yes": 0.95954949567781, "No": 0.04045015514536792}, "ground_truth": 0}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9933513532099036, "res": {"Yes": 0.9933513532099036, "No": 0.0066485133878819444}, "ground_truth": 1}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9932541983648115, "res": {"Yes": 0.9932541983648115, "No": 0.006745754061126916}, "ground_truth": 0}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7781408733898311, "res": {"Yes": 0.7781408733898311, "No": 0.22185854164615948}, "ground_truth": 0}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8259000067438582, "res": {"Yes": 0.8259000067438582, "No": 0.17409951812807856}, "ground_truth": 0}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8982096542005413, "res": {"Yes": 0.8982096542005413, "No": 0.1017901436785073}, "ground_truth": 0}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993372243328869, "res": {"Yes": 0.9993372243328869, "No": 0.0006625698312488772}, "ground_truth": 1}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993305570270018, "res": {"Yes": 0.9993305570270018, "No": 0.000669376416909238}, "ground_truth": 0}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9980392732410507, "res": {"Yes": 0.9980392732410507, "No": 0.0019607459331567583}, "ground_truth": 0}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7111611658295567, "res": {"Yes": 0.7111611658295567, "No": 0.2888384626240637}, "ground_truth": 0}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9895041301217712, "res": {"Yes": 0.9895041301217712, "No": 0.0104957422925709}, "ground_truth": 0}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8465898754825127, "res": {"Yes": 0.8465898754825127, "No": 0.15340986241053775}, "ground_truth": 1}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9404339954314278, "res": {"Yes": 0.9404339954314278, "No": 0.059565900970969754}, "ground_truth": 0}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7345278625874042, "res": {"Yes": 0.7345278625874042, "No": 0.26547181556476834}, "ground_truth": 0}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9773902037116746, "res": {"Yes": 0.9773902037116746, "No": 0.022609629033815252}, "ground_truth": 0}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9919622012518382, "res": {"Yes": 0.9919622012518382, "No": 0.008037742651113114}, "ground_truth": 0}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9752842880476248, "res": {"Yes": 0.9752842880476248, "No": 0.02471563350851583}, "ground_truth": 1}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989894958035747, "res": {"Yes": 0.9989894958035747, "No": 0.001010498000344343}, "ground_truth": 0}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9976880311265041, "res": {"Yes": 0.9976880311265041, "No": 0.0023119082805530284}, "ground_truth": 0}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9826914658906947, "res": {"Yes": 0.9826914658906947, "No": 0.0173085150352331}, "ground_truth": 0}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9889163236164774, "res": {"Yes": 0.9889163236164774, "No": 0.011083567200211938}, "ground_truth": 0}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9905845119196962, "res": {"Yes": 0.9905845119196962, "No": 0.009415364526416111}, "ground_truth": 1}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9883337624859658, "res": {"Yes": 0.9883337624859658, "No": 0.01166607758674897}, "ground_truth": 0}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9954859879163495, "res": {"Yes": 0.9954859879163495, "No": 0.004513939515454012}, "ground_truth": 0}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9390981210239003, "res": {"Yes": 0.9390981210239003, "No": 0.06090160195088578}, "ground_truth": 0}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8566552643044583, "res": {"Yes": 0.8566552643044583, "No": 0.1433445222213357}, "ground_truth": 0}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5103132888505276, "res": {"Yes": 0.5103132888505276, "No": 0.4896863474884269}, "ground_truth": 1}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8647583545326689, "res": {"Yes": 0.8647583545326689, "No": 0.13524142018196192}, "ground_truth": 0}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.852779742463175, "res": {"Yes": 0.852779742463175, "No": 0.14722007542841853}, "ground_truth": 0}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8601342476488093, "res": {"Yes": 0.8601342476488093, "No": 0.13986563041628963}, "ground_truth": 0}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9960634202250019, "res": {"Yes": 0.9960634202250019, "No": 0.0039365416300450595}, "ground_truth": 0}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9902504272732722, "res": {"Yes": 0.9902504272732722, "No": 0.009749489054619143}, "ground_truth": 1}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9760265461643947, "res": {"Yes": 0.9760265461643947, "No": 0.023973354369628073}, "ground_truth": 0}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9905541009467065, "res": {"Yes": 0.9905541009467065, "No": 0.009445866883260544}, "ground_truth": 0}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.3470302940875445, "res": {"No": 0.65296947277141, "Yes": 0.3470302940875445}, "ground_truth": 0}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8947825331073251, "res": {"Yes": 0.8947825331073251, "No": 0.10521735718872478}, "ground_truth": 0}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8284010681899392, "res": {"Yes": 0.8284010681899392, "No": 0.1715983684226486}, "ground_truth": 1}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9870986738895835, "res": {"Yes": 0.9870986738895835, "No": 0.0129010703266278}, "ground_truth": 0}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8622219317750349, "res": {"Yes": 0.8622219317750349, "No": 0.1377778739080224}, "ground_truth": 0}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9871583813456868, "res": {"Yes": 0.9871583813456868, "No": 0.012841459743419699}, "ground_truth": 0}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9931828207166542, "res": {"Yes": 0.9931828207166542, "No": 0.006817131478030229}, "ground_truth": 0}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9825563334869747, "res": {"Yes": 0.9825563334869747, "No": 0.017443527779895978}, "ground_truth": 1}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9977544903373782, "res": {"Yes": 0.9977544903373782, "No": 0.002245505760695008}, "ground_truth": 0}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9816231872221217, "res": {"Yes": 0.9816231872221217, "No": 0.01837686210445002}, "ground_truth": 0}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9876295390749222, "res": {"Yes": 0.9876295390749222, "No": 0.012370351221172667}, "ground_truth": 0}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9942071421810538, "res": {"Yes": 0.9942071421810538, "No": 0.005792829032947193}, "ground_truth": 0}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9941290378505221, "res": {"Yes": 0.9941290378505221, "No": 0.005870944824014693}, "ground_truth": 1}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9962569397798416, "res": {"Yes": 0.9962569397798416, "No": 0.0037431005011232246}, "ground_truth": 0}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9974807903480434, "res": {"Yes": 0.9974807903480434, "No": 0.002519158151844244}, "ground_truth": 0}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9994716518250796, "res": {"Yes": 0.9994716518250796, "No": 0.0005282431863944707}, "ground_truth": 0}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9553609612679089, "res": {"Yes": 0.9553609612679089, "No": 0.04463887334119938}, "ground_truth": 0}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999412791111388, "res": {"Yes": 0.9999412791111388, "No": 5.8636921503438496e-05}, "ground_truth": 1}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9515669196791494, "res": {"Yes": 0.9515669196791494, "No": 0.0484328391741388}, "ground_truth": 0}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9741884661342417, "res": {"Yes": 0.9741884661342417, "No": 0.025811343839127532}, "ground_truth": 0}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.811675864633853, "res": {"Yes": 0.811675864633853, "No": 0.18832394260901253}, "ground_truth": 0}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9927052775908686, "res": {"Yes": 0.9927052775908686, "No": 0.007294628672119826}, "ground_truth": 0}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8822669715007418, "res": {"Yes": 0.8822669715007418, "No": 0.11773245020954552}, "ground_truth": 1}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9655801215893932, "res": {"Yes": 0.9655801215893932, "No": 0.03441960376608144}, "ground_truth": 0}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9874443492349384, "res": {"Yes": 0.9874443492349384, "No": 0.012555375229014936}, "ground_truth": 0}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5785883045524469, "res": {"Yes": 0.5785883045524469, "No": 0.42141113674287534}, "ground_truth": 0}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9895466193518304, "res": {"Yes": 0.9895466193518304, "No": 0.010453277639878272}, "ground_truth": 0}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9933433522936509, "res": {"Yes": 0.9933433522936509, "No": 0.006656453791351797}, "ground_truth": 1}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9099574671128422, "res": {"Yes": 0.9099574671128422, "No": 0.0900423197253236}, "ground_truth": 0}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9831595202577699, "res": {"Yes": 0.9831595202577699, "No": 0.016840476076357736}, "ground_truth": 0}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9933194795597331, "res": {"Yes": 0.9933194795597331, "No": 0.006680511650370098}, "ground_truth": 0}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9504258325782862, "res": {"Yes": 0.9504258325782862, "No": 0.049574097970075386}, "ground_truth": 0}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9185013174302545, "res": {"Yes": 0.9185013174302545, "No": 0.08149861929862404}, "ground_truth": 1}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9977241091757029, "res": {"Yes": 0.9977241091757029, "No": 0.002275887255938827}, "ground_truth": 0}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7186344104837881, "res": {"Yes": 0.7186344104837881, "No": 0.28136545585300166}, "ground_truth": 0}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8729366776200639, "res": {"Yes": 0.8729366776200639, "No": 0.12706285419910188}, "ground_truth": 0}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9755420985318292, "res": {"Yes": 0.9755420985318292, "No": 0.02445764825283084}, "ground_truth": 0}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9680956060520128, "res": {"Yes": 0.9680956060520128, "No": 0.03190429942691232}, "ground_truth": 1}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9215209937344241, "res": {"Yes": 0.9215209937344241, "No": 0.07847860022162374}, "ground_truth": 0}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8494112555614337, "res": {"Yes": 0.8494112555614337, "No": 0.1505885945325313}, "ground_truth": 0}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.864609161997388, "res": {"Yes": 0.864609161997388, "No": 0.13539047068078433}, "ground_truth": 0}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9930275054253395, "res": {"Yes": 0.9930275054253395, "No": 0.006972300857524608}, "ground_truth": 0}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9583372054091684, "res": {"Yes": 0.9583372054091684, "No": 0.041662543135220005}, "ground_truth": 1}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9932901889867822, "res": {"Yes": 0.9932901889867822, "No": 0.0067097103221232775}, "ground_truth": 0}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7483504160879398, "res": {"Yes": 0.7483504160879398, "No": 0.2516489797525138}, "ground_truth": 0}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8931418894195974, "res": {"Yes": 0.8931418894195974, "No": 0.10685791758096609}, "ground_truth": 0}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9769570754117247, "res": {"Yes": 0.9769570754117247, "No": 0.023042888087275653}, "ground_truth": 0}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9948025636555158, "res": {"Yes": 0.9948025636555158, "No": 0.005197414440665603}, "ground_truth": 1}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9124491418843492, "res": {"Yes": 0.9124491418843492, "No": 0.0875507284409421}, "ground_truth": 0}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9499041777510787, "res": {"Yes": 0.9499041777510787, "No": 0.05009575525148446}, "ground_truth": 0}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7715907392742284, "res": {"Yes": 0.7715907392742284, "No": 0.22840918213870964}, "ground_truth": 0}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9586404530609212, "res": {"Yes": 0.9586404530609212, "No": 0.04135929063170529}, "ground_truth": 0}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9966675625363656, "res": {"Yes": 0.9966675625363656, "No": 0.0033324483663497834}, "ground_truth": 1}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8030536382047694, "res": {"Yes": 0.8030536382047694, "No": 0.19694621654121158}, "ground_truth": 0}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9348417783770958, "res": {"Yes": 0.9348417783770958, "No": 0.0651579602143064}, "ground_truth": 0}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9743045622199663, "res": {"Yes": 0.9743045622199663, "No": 0.025695345819459585}, "ground_truth": 0}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9972097200875749, "res": {"Yes": 0.9972097200875749, "No": 0.00279026027212464}, "ground_truth": 0}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.956109324784305, "res": {"Yes": 0.956109324784305, "No": 0.043890393926894526}, "ground_truth": 1}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9946912062182244, "res": {"Yes": 0.9946912062182244, "No": 0.005308759961012296}, "ground_truth": 0}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8222960751758143, "res": {"Yes": 0.8222960751758143, "No": 0.17770379283756282}, "ground_truth": 0}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.975169664745086, "res": {"Yes": 0.975169664745086, "No": 0.024830091361211077}, "ground_truth": 0}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995408383270004, "res": {"Yes": 0.9995408383270004, "No": 0.000458982718013165}, "ground_truth": 0}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.995753396031463, "res": {"Yes": 0.995753396031463, "No": 0.004246448467948966}, "ground_truth": 1}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998235699211675, "res": {"Yes": 0.998235699211675, "No": 0.001764296061787313}, "ground_truth": 0}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993798498726276, "res": {"Yes": 0.9993798498726276, "No": 0.0006199436321161101}, "ground_truth": 0}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6970709569720499, "res": {"Yes": 0.6970709569720499, "No": 0.30292882562295065}, "ground_truth": 0}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9027639068124962, "res": {"Yes": 0.9027639068124962, "No": 0.0972358994303691}, "ground_truth": 0}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9756901755975963, "res": {"Yes": 0.9756901755975963, "No": 0.024309727502328553}, "ground_truth": 1}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.972023130494302, "res": {"Yes": 0.972023130494302, "No": 0.027976796746033733}, "ground_truth": 0}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9801019324719387, "res": {"Yes": 0.9801019324719387, "No": 0.019897997111108467}, "ground_truth": 0}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9885673966176634, "res": {"Yes": 0.9885673966176634, "No": 0.011432541184878749}, "ground_truth": 0}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9963843861299111, "res": {"Yes": 0.9963843861299111, "No": 0.0036155926315593985}, "ground_truth": 0}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9971158417987673, "res": {"Yes": 0.9971158417987673, "No": 0.0028841348577223276}, "ground_truth": 1}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9901917527128229, "res": {"Yes": 0.9901917527128229, "No": 0.009808110828949633}, "ground_truth": 0}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9970687979860312, "res": {"Yes": 0.9970687979860312, "No": 0.0029311589150775915}, "ground_truth": 0}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.28185619074950374, "res": {"No": 0.7181433557818427, "Yes": 0.28185619074950374}, "ground_truth": 0}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.3079095524536901, "res": {"No": 0.6920898920462752, "Yes": 0.3079095524536901}, "ground_truth": 0}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8301819046293374, "res": {"Yes": 0.8301819046293374, "No": 0.16981793269368567}, "ground_truth": 1}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7971733505624354, "res": {"Yes": 0.7971733505624354, "No": 0.2028265497311025}, "ground_truth": 0}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7938662522276337, "res": {"Yes": 0.7938662522276337, "No": 0.20613318293377425}, "ground_truth": 0}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.4068811707208966, "res": {"No": 0.5931184813654646, "Yes": 0.4068811707208966}, "ground_truth": 0}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9467273295855021, "res": {"Yes": 0.9467273295855021, "No": 0.05327253347773869}, "ground_truth": 0}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9768081545417729, "res": {"Yes": 0.9768081545417729, "No": 0.023191837363487913}, "ground_truth": 1}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9550175096093579, "res": {"Yes": 0.9550175096093579, "No": 0.04498241695057828}, "ground_truth": 0}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9248322400739678, "res": {"Yes": 0.9248322400739678, "No": 0.07516764449420822}, "ground_truth": 0}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9287198002809742, "res": {"Yes": 0.9287198002809742, "No": 0.07127994980603755}, "ground_truth": 0}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9881307272072796, "res": {"Yes": 0.9881307272072796, "No": 0.011869161655786178}, "ground_truth": 0}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9739878049531174, "res": {"Yes": 0.9739878049531174, "No": 0.026012000233463728}, "ground_truth": 1}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993653234923942, "res": {"Yes": 0.9993653234923942, "No": 0.00063465792817988}, "ground_truth": 0}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9943473911538038, "res": {"Yes": 0.9943473911538038, "No": 0.0056525754829271635}, "ground_truth": 0}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.893338688119223, "res": {"Yes": 0.893338688119223, "No": 0.10666113847461647}, "ground_truth": 0}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.958977438926038, "res": {"Yes": 0.958977438926038, "No": 0.041022425434083576}, "ground_truth": 0}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8805394129829706, "res": {"Yes": 0.8805394129829706, "No": 0.11946024987211248}, "ground_truth": 1}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9487809015490754, "res": {"Yes": 0.9487809015490754, "No": 0.051218990884295754}, "ground_truth": 0}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7448860167193991, "res": {"Yes": 0.7448860167193991, "No": 0.25511373848307983}, "ground_truth": 0}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7718688861303116, "res": {"Yes": 0.7718688861303116, "No": 0.22813094379340892}, "ground_truth": 0}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9838045044575512, "res": {"Yes": 0.9838045044575512, "No": 0.01619551381653221}, "ground_truth": 0}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8754984742291593, "res": {"Yes": 0.8754984742291593, "No": 0.12450126530956286}, "ground_truth": 1}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9441566724775462, "res": {"Yes": 0.9441566724775462, "No": 0.055843245544600115}, "ground_truth": 0}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9036580892403534, "res": {"Yes": 0.9036580892403534, "No": 0.09634183797086336}, "ground_truth": 0}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.21144285629588205, "res": {"No": 0.7885569968734919, "Yes": 0.21144285629588205}, "ground_truth": 0}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8103538944526023, "res": {"Yes": 0.8103538944526023, "No": 0.1896458866967842}, "ground_truth": 0}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9137073543803961, "res": {"Yes": 0.9137073543803961, "No": 0.08629246469003778}, "ground_truth": 1}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9616545965833194, "res": {"Yes": 0.9616545965833194, "No": 0.03834516633197174}, "ground_truth": 0}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9824400007916304, "res": {"Yes": 0.9824400007916304, "No": 0.017559829506839597}, "ground_truth": 0}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6553477946962929, "res": {"Yes": 0.6553477946962929, "No": 0.34465178172464206}, "ground_truth": 0}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9919663030240208, "res": {"Yes": 0.9919663030240208, "No": 0.008033668548461294}, "ground_truth": 0}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998049839738379, "res": {"Yes": 0.998049839738379, "No": 0.0019500920546654066}, "ground_truth": 1}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9856940831669586, "res": {"Yes": 0.9856940831669586, "No": 0.014305798992039945}, "ground_truth": 0}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9588981865516115, "res": {"Yes": 0.9588981865516115, "No": 0.04110173270894894}, "ground_truth": 0}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5361230518030775, "res": {"Yes": 0.5361230518030775, "No": 0.46387672543329694}, "ground_truth": 0}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.19879709451145333, "res": {"No": 0.8012027282432528, "Yes": 0.19879709451145333}, "ground_truth": 0}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8937915826627493, "res": {"Yes": 0.8937915826627493, "No": 0.10620835510115484}, "ground_truth": 1}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9482730469398194, "res": {"Yes": 0.9482730469398194, "No": 0.05172689603275585}, "ground_truth": 0}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8809058552440598, "res": {"Yes": 0.8809058552440598, "No": 0.11909399782339539}, "ground_truth": 0}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8931165959994455, "res": {"Yes": 0.8931165959994455, "No": 0.10688305351905159}, "ground_truth": 0}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9626203034822179, "res": {"Yes": 0.9626203034822179, "No": 0.03737941227718785}, "ground_truth": 0}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.969862848287263, "res": {"Yes": 0.969862848287263, "No": 0.03013699476335826}, "ground_truth": 1}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9277028427767203, "res": {"Yes": 0.9277028427767203, "No": 0.072296945376329}, "ground_truth": 0}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9647213980605027, "res": {"Yes": 0.9647213980605027, "No": 0.03527844940199664}, "ground_truth": 0}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9924076726357809, "res": {"Yes": 0.9924076726357809, "No": 0.0075922871172139006}, "ground_truth": 0}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9571878273151484, "res": {"Yes": 0.9571878273151484, "No": 0.0428119932297112}, "ground_truth": 0}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984897331026119, "res": {"Yes": 0.9984897331026119, "No": 0.001510278059112979}, "ground_truth": 1}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9763223582587336, "res": {"Yes": 0.9763223582587336, "No": 0.023677560350804688}, "ground_truth": 0}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9956738588252317, "res": {"Yes": 0.9956738588252317, "No": 0.00432612524579121}, "ground_truth": 0}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9143814026661262, "res": {"Yes": 0.9143814026661262, "No": 0.08561843130744878}, "ground_truth": 0}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9923882958666594, "res": {"Yes": 0.9923882958666594, "No": 0.007611682811129903}, "ground_truth": 0}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9725985654281685, "res": {"Yes": 0.9725985654281685, "No": 0.02740117454228783}, "ground_truth": 1}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9960916884079528, "res": {"Yes": 0.9960916884079528, "No": 0.00390816509617186}, "ground_truth": 0}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.989928709551922, "res": {"Yes": 0.989928709551922, "No": 0.010071214235774372}, "ground_truth": 0}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9361504670771575, "res": {"Yes": 0.9361504670771575, "No": 0.06384938681969847}, "ground_truth": 0}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9843969714638373, "res": {"Yes": 0.9843969714638373, "No": 0.015603005859287901}, "ground_truth": 0}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.990290646120844, "res": {"Yes": 0.990290646120844, "No": 0.009709210576843947}, "ground_truth": 1}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984045249257605, "res": {"Yes": 0.9984045249257605, "No": 0.0015954644402847297}, "ground_truth": 0}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9432515168771214, "res": {"Yes": 0.9432515168771214, "No": 0.05674831015888012}, "ground_truth": 0}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9127297083750012, "res": {"Yes": 0.9127297083750012, "No": 0.08727015864015386}, "ground_truth": 0}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.2212974399998391, "res": {"No": 0.7787025669102852, "Yes": 0.2212974399998391}, "ground_truth": 0}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8500445832422828, "res": {"Yes": 0.8500445832422828, "No": 0.14995537937086711}, "ground_truth": 1}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9562019561865271, "res": {"Yes": 0.9562019561865271, "No": 0.04379796604529564}, "ground_truth": 0}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8098004821421685, "res": {"Yes": 0.8098004821421685, "No": 0.19019933979238096}, "ground_truth": 0}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9391634093946714, "res": {"Yes": 0.9391634093946714, "No": 0.060835936728890386}, "ground_truth": 0}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9957041155234649, "res": {"Yes": 0.9957041155234649, "No": 0.004295701102346369}, "ground_truth": 0}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9922495384242227, "res": {"Yes": 0.9922495384242227, "No": 0.007750253705183405}, "ground_truth": 1}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9909829863133286, "res": {"Yes": 0.9909829863133286, "No": 0.009016764609457715}, "ground_truth": 0}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9408436624477322, "res": {"Yes": 0.9408436624477322, "No": 0.05915606250885397}, "ground_truth": 0}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.49171984765680676, "res": {"No": 0.5082798486066915, "Yes": 0.49171984765680676}, "ground_truth": 0}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8935920034123577, "res": {"Yes": 0.8935920034123577, "No": 0.10640767193731465}, "ground_truth": 0}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9915452412162227, "res": {"Yes": 0.9915452412162227, "No": 0.008454478314115176}, "ground_truth": 1}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9552537996626176, "res": {"Yes": 0.9552537996626176, "No": 0.04474603685869381}, "ground_truth": 0}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9735721440559715, "res": {"Yes": 0.9735721440559715, "No": 0.026427379605079522}, "ground_truth": 0}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7077565502495744, "res": {"Yes": 0.7077565502495744, "No": 0.2922432136334003}, "ground_truth": 0}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8714319681557473, "res": {"Yes": 0.8714319681557473, "No": 0.12856783694185622}, "ground_truth": 0}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998023955451834, "res": {"Yes": 0.998023955451834, "No": 0.0019759960078052008}, "ground_truth": 1}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.974552442213166, "res": {"Yes": 0.974552442213166, "No": 0.02544735425336726}, "ground_truth": 0}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9904429932580496, "res": {"Yes": 0.9904429932580496, "No": 0.009556942741299214}, "ground_truth": 0}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8606108510103229, "res": {"Yes": 0.8606108510103229, "No": 0.13938904089421097}, "ground_truth": 0}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9924817621074548, "res": {"Yes": 0.9924817621074548, "No": 0.007518234067853948}, "ground_truth": 0}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8932111284625358, "res": {"Yes": 0.8932111284625358, "No": 0.10678880286931144}, "ground_truth": 1}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9567170009951673, "res": {"Yes": 0.9567170009951673, "No": 0.043282949392023665}, "ground_truth": 0}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993177027706853, "res": {"Yes": 0.9993177027706853, "No": 0.0006822966663878417}, "ground_truth": 0}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9993691319309939, "res": {"Yes": 0.9993691319309939, "No": 0.0006308490168781479}, "ground_truth": 0}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989996068272273, "res": {"Yes": 0.9989996068272273, "No": 0.0010003673464373203}, "ground_truth": 0}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99499855906547, "res": {"Yes": 0.99499855906547, "No": 0.005001425535916831}, "ground_truth": 1}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9948155422328631, "res": {"Yes": 0.9948155422328631, "No": 0.005184452686292317}, "ground_truth": 0}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995269016763211, "res": {"Yes": 0.9995269016763211, "No": 0.0004730456322614421}, "ground_truth": 0}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.972970499861849, "res": {"Yes": 0.972970499861849, "No": 0.027029330169592566}, "ground_truth": 0}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7561275349549431, "res": {"Yes": 0.7561275349549431, "No": 0.24387236129625275}, "ground_truth": 0}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8758085168590876, "res": {"Yes": 0.8758085168590876, "No": 0.12419144409247246}, "ground_truth": 1}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9314169268901068, "res": {"Yes": 0.9314169268901068, "No": 0.06858278659556215}, "ground_truth": 0}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8969387342325809, "res": {"Yes": 0.8969387342325809, "No": 0.10306107554822923}, "ground_truth": 0}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8780765520717448, "res": {"Yes": 0.8780765520717448, "No": 0.1219227416512059}, "ground_truth": 0}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7882419445651903, "res": {"Yes": 0.7882419445651903, "No": 0.2117578656951183}, "ground_truth": 0}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.790775499565374, "res": {"Yes": 0.790775499565374, "No": 0.20922394363886462}, "ground_truth": 1}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.880383236753575, "res": {"Yes": 0.880383236753575, "No": 0.11961643865050381}, "ground_truth": 0}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9427577483467116, "res": {"Yes": 0.9427577483467116, "No": 0.057242010549891455}, "ground_truth": 0}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9932861854394148, "res": {"Yes": 0.9932861854394148, "No": 0.006713807850918336}, "ground_truth": 0}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.964570420364914, "res": {"Yes": 0.964570420364914, "No": 0.03542949194115428}, "ground_truth": 0}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9553374596773325, "res": {"Yes": 0.9553374596773325, "No": 0.044662475687198}, "ground_truth": 1}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9877730520729167, "res": {"Yes": 0.9877730520729167, "No": 0.012226844202358767}, "ground_truth": 0}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9897914674305927, "res": {"Yes": 0.9897914674305927, "No": 0.01020843438393398}, "ground_truth": 0}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9920998108326207, "res": {"Yes": 0.9920998108326207, "No": 0.007900140775376647}, "ground_truth": 0}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9263922110129239, "res": {"Yes": 0.9263922110129239, "No": 0.07360750414153104}, "ground_truth": 0}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9266458919276316, "res": {"Yes": 0.9266458919276316, "No": 0.07335375373778409}, "ground_truth": 1}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9962255940446221, "res": {"Yes": 0.9962255940446221, "No": 0.0037743390413311866}, "ground_truth": 0}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8892652135557019, "res": {"Yes": 0.8892652135557019, "No": 0.11073457028373647}, "ground_truth": 0}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7658388959229949, "res": {"Yes": 0.7658388959229949, "No": 0.2341609217957207}, "ground_truth": 0}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992130766866801, "res": {"Yes": 0.9992130766866801, "No": 0.0007868132745683448}, "ground_truth": 0}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9925268508833381, "res": {"Yes": 0.9925268508833381, "No": 0.007472912222911474}, "ground_truth": 1}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989191859007542, "res": {"Yes": 0.9989191859007542, "No": 0.0010805985266615899}, "ground_truth": 0}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9907945356212252, "res": {"Yes": 0.9907945356212252, "No": 0.009205277666542553}, "ground_truth": 0}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5424759371690555, "res": {"Yes": 0.5424759371690555, "No": 0.4575238200220323}, "ground_truth": 0}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.950707080475993, "res": {"Yes": 0.950707080475993, "No": 0.049292856845662605}, "ground_truth": 0}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8990860784356577, "res": {"Yes": 0.8990860784356577, "No": 0.1009139303702507}, "ground_truth": 1}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9497803819573937, "res": {"Yes": 0.9497803819573937, "No": 0.05021952418044463}, "ground_truth": 0}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.4190403554394057, "res": {"No": 0.5809593442801781, "Yes": 0.4190403554394057}, "ground_truth": 0}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9711465200883197, "res": {"Yes": 0.9711465200883197, "No": 0.02885325406041144}, "ground_truth": 0}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995551290044593, "res": {"Yes": 0.9995551290044593, "No": 0.0004448217134543776}, "ground_truth": 0}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999637428784835, "res": {"Yes": 0.999637428784835, "No": 0.00036248480564865797}, "ground_truth": 1}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9885560973568953, "res": {"Yes": 0.9885560973568953, "No": 0.011443664739132622}, "ground_truth": 0}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9963305433710721, "res": {"Yes": 0.9963305433710721, "No": 0.003669473821803892}, "ground_truth": 0}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9588003225613794, "res": {"Yes": 0.9588003225613794, "No": 0.0411995683344796}, "ground_truth": 0}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.997979546769121, "res": {"Yes": 0.997979546769121, "No": 0.0020204904500196984}, "ground_truth": 0}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9950484833756131, "res": {"Yes": 0.9950484833756131, "No": 0.004951463943322724}, "ground_truth": 1}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9897327201015345, "res": {"Yes": 0.9897327201015345, "No": 0.010267193130917184}, "ground_truth": 0}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9647728779325211, "res": {"Yes": 0.9647728779325211, "No": 0.03522708612844907}, "ground_truth": 0}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7769168721405202, "res": {"Yes": 0.7769168721405202, "No": 0.22308261617729222}, "ground_truth": 0}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9972099540330027, "res": {"Yes": 0.9972099540330027, "No": 0.002790006090991977}, "ground_truth": 0}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9938430646859923, "res": {"Yes": 0.9938430646859923, "No": 0.006156843817688658}, "ground_truth": 1}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995239229806445, "res": {"Yes": 0.9995239229806445, "No": 0.00047606364723167997}, "ground_truth": 0}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9857884866239996, "res": {"Yes": 0.9857884866239996, "No": 0.014211373276320638}, "ground_truth": 0}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9910193947060626, "res": {"Yes": 0.9910193947060626, "No": 0.008980536990133849}, "ground_truth": 0}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9839205913833081, "res": {"Yes": 0.9839205913833081, "No": 0.01607940529591714}, "ground_truth": 0}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9915857992206906, "res": {"Yes": 0.9915857992206906, "No": 0.008414120269008929}, "ground_truth": 1}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9024561384603996, "res": {"Yes": 0.9024561384603996, "No": 0.09754374840744325}, "ground_truth": 0}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9969402296679306, "res": {"Yes": 0.9969402296679306, "No": 0.0030597012454196}, "ground_truth": 0}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9735572417012179, "res": {"Yes": 0.9735572417012179, "No": 0.026442608804819787}, "ground_truth": 0}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9764861364011798, "res": {"Yes": 0.9764861364011798, "No": 0.023513832828671574}, "ground_truth": 0}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9944152917876128, "res": {"Yes": 0.9944152917876128, "No": 0.005584672988058479}, "ground_truth": 1}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9141359927766639, "res": {"Yes": 0.9141359927766639, "No": 0.08586394245421666}, "ground_truth": 0}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9586158000213924, "res": {"Yes": 0.9586158000213924, "No": 0.041384109513834616}, "ground_truth": 0}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9720917324754431, "res": {"Yes": 0.9720917324754431, "No": 0.027908100270976968}, "ground_truth": 0}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8539950768694968, "res": {"Yes": 0.8539950768694968, "No": 0.14600438842792665}, "ground_truth": 0}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7368486923303135, "res": {"Yes": 0.7368486923303135, "No": 0.2631507545001168}, "ground_truth": 1}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9100320912346664, "res": {"Yes": 0.9100320912346664, "No": 0.08996764259673827}, "ground_truth": 0}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7099358252526661, "res": {"Yes": 0.7099358252526661, "No": 0.2900640263073048}, "ground_truth": 0}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9549105421790014, "res": {"Yes": 0.9549105421790014, "No": 0.04508921032541591}, "ground_truth": 0}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8684755115121477, "res": {"Yes": 0.8684755115121477, "No": 0.13152393877709795}, "ground_truth": 0}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.989868081244918, "res": {"Yes": 0.989868081244918, "No": 0.01013174717776469}, "ground_truth": 1}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9574100278828336, "res": {"Yes": 0.9574100278828336, "No": 0.04258977832162929}, "ground_truth": 0}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9617644093827982, "res": {"Yes": 0.9617644093827982, "No": 0.038235319358633967}, "ground_truth": 0}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6578199482369051, "res": {"Yes": 0.6578199482369051, "No": 0.3421798167919615}, "ground_truth": 0}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9250581394365388, "res": {"Yes": 0.9250581394365388, "No": 0.07494174504442484}, "ground_truth": 0}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9393589072114548, "res": {"Yes": 0.9393589072114548, "No": 0.06064097859487206}, "ground_truth": 1}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8268093686681752, "res": {"Yes": 0.8268093686681752, "No": 0.17319035005911465}, "ground_truth": 0}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9268841446772247, "res": {"Yes": 0.9268841446772247, "No": 0.07311556554073831}, "ground_truth": 0}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.969837390691478, "res": {"Yes": 0.969837390691478, "No": 0.030162429058818073}, "ground_truth": 0}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9835257229458779, "res": {"Yes": 0.9835257229458779, "No": 0.01647408069747843}, "ground_truth": 0}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9776803469321481, "res": {"Yes": 0.9776803469321481, "No": 0.02231936704656354}, "ground_truth": 1}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.966233752720637, "res": {"Yes": 0.966233752720637, "No": 0.03376603038695332}, "ground_truth": 0}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9670821148686732, "res": {"Yes": 0.9670821148686732, "No": 0.03291761019921101}, "ground_truth": 0}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9582096755050804, "res": {"Yes": 0.9582096755050804, "No": 0.04179005861144856}, "ground_truth": 0}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9774609194735995, "res": {"Yes": 0.9774609194735995, "No": 0.022539016609603862}, "ground_truth": 0}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9767485612941749, "res": {"Yes": 0.9767485612941749, "No": 0.023251365906679977}, "ground_truth": 1}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9295060945847639, "res": {"Yes": 0.9295060945847639, "No": 0.07049381524735668}, "ground_truth": 0}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9609405303665404, "res": {"Yes": 0.9609405303665404, "No": 0.03905931073510414}, "ground_truth": 0}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.971772467362472, "res": {"Yes": 0.971772467362472, "No": 0.028227149950391155}, "ground_truth": 0}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9897140430316015, "res": {"Yes": 0.9897140430316015, "No": 0.010285696892806196}, "ground_truth": 0}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9968742322160704, "res": {"Yes": 0.9968742322160704, "No": 0.0031255969188582187}, "ground_truth": 1}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9495302076169477, "res": {"Yes": 0.9495302076169477, "No": 0.05046955983437381}, "ground_truth": 0}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.984144749104457, "res": {"Yes": 0.984144749104457, "No": 0.015854999540075417}, "ground_truth": 0}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9041378601961106, "res": {"Yes": 0.9041378601961106, "No": 0.09586203083963935}, "ground_truth": 0}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9353848781561871, "res": {"Yes": 0.9353848781561871, "No": 0.06461496103748869}, "ground_truth": 0}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9843749153123906, "res": {"Yes": 0.9843749153123906, "No": 0.015624991927496954}, "ground_truth": 1}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8851149282289558, "res": {"Yes": 0.8851149282289558, "No": 0.11488495989968157}, "ground_truth": 0}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8931269651433178, "res": {"Yes": 0.8931269651433178, "No": 0.10687284667482509}, "ground_truth": 0}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8448968585183785, "res": {"Yes": 0.8448968585183785, "No": 0.15510293278255238}, "ground_truth": 0}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6825181219280476, "res": {"Yes": 0.6825181219280476, "No": 0.3174815412927754}, "ground_truth": 0}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7707448839448671, "res": {"Yes": 0.7707448839448671, "No": 0.22925455474884165}, "ground_truth": 1}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.705936703928718, "res": {"Yes": 0.705936703928718, "No": 0.294062749718698}, "ground_truth": 0}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7945344129280218, "res": {"Yes": 0.7945344129280218, "No": 0.20546552924832226}, "ground_truth": 0}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5852427093585674, "res": {"Yes": 0.5852427093585674, "No": 0.4147570715111133}, "ground_truth": 0}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9903506159769595, "res": {"Yes": 0.9903506159769595, "No": 0.009649279785918907}, "ground_truth": 0}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9845423136392514, "res": {"Yes": 0.9845423136392514, "No": 0.015457669361514248}, "ground_truth": 1}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7790244478273943, "res": {"Yes": 0.7790244478273943, "No": 0.22097548747914952}, "ground_truth": 0}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7144940779714697, "res": {"Yes": 0.7144940779714697, "No": 0.28550572429985915}, "ground_truth": 0}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.947651291846674, "res": {"Yes": 0.947651291846674, "No": 0.0523486253942412}, "ground_truth": 0}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.974367709919259, "res": {"Yes": 0.974367709919259, "No": 0.025632200137902846}, "ground_truth": 0}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9934048821844653, "res": {"Yes": 0.9934048821844653, "No": 0.006595117344644749}, "ground_truth": 1}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9981788018975124, "res": {"Yes": 0.9981788018975124, "No": 0.0018212037416658854}, "ground_truth": 0}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996667353749963, "res": {"Yes": 0.9996667353749963, "No": 0.0003332158765824108}, "ground_truth": 0}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9400086853751274, "res": {"Yes": 0.9400086853751274, "No": 0.0599910436741335}, "ground_truth": 0}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9598929511083601, "res": {"Yes": 0.9598929511083601, "No": 0.040106875520620346}, "ground_truth": 0}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9971800867457908, "res": {"Yes": 0.9971800867457908, "No": 0.0028198922612287084}, "ground_truth": 1}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9783753618590413, "res": {"Yes": 0.9783753618590413, "No": 0.02162452914592686}, "ground_truth": 0}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7121276713151912, "res": {"Yes": 0.7121276713151912, "No": 0.28787160909465676}, "ground_truth": 0}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9961596952038694, "res": {"Yes": 0.9961596952038694, "No": 0.003840329917720611}, "ground_truth": 0}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.996396108262613, "res": {"Yes": 0.996396108262613, "No": 0.0036039024743262735}, "ground_truth": 0}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9947723671804416, "res": {"Yes": 0.9947723671804416, "No": 0.005227598417299345}, "ground_truth": 1}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993369861408878, "res": {"Yes": 0.9993369861408878, "No": 0.0006629944094606574}, "ground_truth": 0}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.997852286544106, "res": {"Yes": 0.997852286544106, "No": 0.002147640674674677}, "ground_truth": 0}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8798340049865453, "res": {"Yes": 0.8798340049865453, "No": 0.12016589891530263}, "ground_truth": 0}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7198566384943451, "res": {"Yes": 0.7198566384943451, "No": 0.28014288567291645}, "ground_truth": 0}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7799173160355997, "res": {"Yes": 0.7799173160355997, "No": 0.22008230256951433}, "ground_truth": 1}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6515946974664427, "res": {"Yes": 0.6515946974664427, "No": 0.34840516820525275}, "ground_truth": 0}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.29750968497158603, "res": {"No": 0.7024899176337533, "Yes": 0.29750968497158603}, "ground_truth": 0}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9496350158984488, "res": {"Yes": 0.9496350158984488, "No": 0.050364737578257726}, "ground_truth": 0}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9113016836101302, "res": {"Yes": 0.9113016836101302, "No": 0.08869810092375771}, "ground_truth": 0}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9527749206723005, "res": {"Yes": 0.9527749206723005, "No": 0.04722498893020458}, "ground_truth": 1}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.831297488185202, "res": {"Yes": 0.831297488185202, "No": 0.16870224194177422}, "ground_truth": 0}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.5897502716945571, "res": {"Yes": 0.5897502716945571, "No": 0.4102493226827062}, "ground_truth": 0}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9657745472005341, "res": {"Yes": 0.9657745472005341, "No": 0.03422541096968336}, "ground_truth": 0}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9970737754656016, "res": {"Yes": 0.9970737754656016, "No": 0.0029262123662064745}, "ground_truth": 0}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998814441782969, "res": {"Yes": 0.9998814441782969, "No": 0.00011847127877179803}, "ground_truth": 1}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9959424407292004, "res": {"Yes": 0.9959424407292004, "No": 0.004057553797026839}, "ground_truth": 0}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986511533125464, "res": {"Yes": 0.9986511533125464, "No": 0.0013488114253510529}, "ground_truth": 0}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7803555966852554, "res": {"Yes": 0.7803555966852554, "No": 0.21964387373071617}, "ground_truth": 0}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.5658064615377761, "res": {"Yes": 0.5658064615377761, "No": 0.43419303181369046}, "ground_truth": 0}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9744497609811879, "res": {"Yes": 0.9744497609811879, "No": 0.02554991216774439}, "ground_truth": 1}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8380984360040048, "res": {"Yes": 0.8380984360040048, "No": 0.1619011200764094}, "ground_truth": 0}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8287253181058075, "res": {"Yes": 0.8287253181058075, "No": 0.17127435379660247}, "ground_truth": 0}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.967016897050016, "res": {"Yes": 0.967016897050016, "No": 0.03298288461798073}, "ground_truth": 0}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990321925679982, "res": {"Yes": 0.9990321925679982, "No": 0.0009677323566330146}, "ground_truth": 0}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990958555851012, "res": {"Yes": 0.9990958555851012, "No": 0.0009041366929387357}, "ground_truth": 1}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985804168627472, "res": {"Yes": 0.9985804168627472, "No": 0.0014195647051964333}, "ground_truth": 0}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9972637803915159, "res": {"Yes": 0.9972637803915159, "No": 0.002736200499297062}, "ground_truth": 0}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9350842750611614, "res": {"Yes": 0.9350842750611614, "No": 0.06491556847417448}, "ground_truth": 0}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990375515209264, "res": {"Yes": 0.9990375515209264, "No": 0.000962418086845537}, "ground_truth": 0}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9933687583681207, "res": {"Yes": 0.9933687583681207, "No": 0.006631203287000167}, "ground_truth": 1}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.937726800370867, "res": {"Yes": 0.937726800370867, "No": 0.06227308307010947}, "ground_truth": 0}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6770111747015215, "res": {"Yes": 0.6770111747015215, "No": 0.3229887455063905}, "ground_truth": 0}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.738716890160419, "res": {"Yes": 0.738716890160419, "No": 0.2612830699953239}, "ground_truth": 0}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9601833405643843, "res": {"Yes": 0.9601833405643843, "No": 0.039816523083184965}, "ground_truth": 0}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.45968902533133243, "res": {"No": 0.5403108483236895, "Yes": 0.45968902533133243}, "ground_truth": 1}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8748978345113714, "res": {"Yes": 0.8748978345113714, "No": 0.12510210939259359}, "ground_truth": 0}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9478624520768735, "res": {"Yes": 0.9478624520768735, "No": 0.052137454090817645}, "ground_truth": 0}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7921224425006032, "res": {"Yes": 0.7921224425006032, "No": 0.2078771913194276}, "ground_truth": 0}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8559995744314839, "res": {"Yes": 0.8559995744314839, "No": 0.1440001698345838}, "ground_truth": 0}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8942757197537208, "res": {"Yes": 0.8942757197537208, "No": 0.10572409030621717}, "ground_truth": 1}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.903144216610402, "res": {"Yes": 0.903144216610402, "No": 0.09685550018333028}, "ground_truth": 0}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7125446087694162, "res": {"Yes": 0.7125446087694162, "No": 0.28745510139536196}, "ground_truth": 0}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9991976035223624, "res": {"Yes": 0.9991976035223624, "No": 0.0008023561270238679}, "ground_truth": 0}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9269912801379054, "res": {"Yes": 0.9269912801379054, "No": 0.0730086245712975}, "ground_truth": 0}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9763262245029275, "res": {"Yes": 0.9763262245029275, "No": 0.023673682284041573}, "ground_truth": 1}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9952938134634647, "res": {"Yes": 0.9952938134634647, "No": 0.004706149129276646}, "ground_truth": 0}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.982918636363226, "res": {"Yes": 0.982918636363226, "No": 0.017081402165370383}, "ground_truth": 0}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8462439110463765, "res": {"Yes": 0.8462439110463765, "No": 0.15375603823400152}, "ground_truth": 0}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9180651512678036, "res": {"Yes": 0.9180651512678036, "No": 0.08193455992698175}, "ground_truth": 0}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9385460842552331, "res": {"Yes": 0.9385460842552331, "No": 0.06145371433276376}, "ground_truth": 1}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9009172121514821, "res": {"Yes": 0.9009172121514821, "No": 0.09908269247226867}, "ground_truth": 0}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9564390729879427, "res": {"Yes": 0.9564390729879427, "No": 0.043560696134323375}, "ground_truth": 0}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9945362555443266, "res": {"Yes": 0.9945362555443266, "No": 0.005463694880802884}, "ground_truth": 0}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.999937703220525, "res": {"Yes": 0.999937703220525, "No": 6.22508106633397e-05}, "ground_truth": 0}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998427193399001, "res": {"Yes": 0.9998427193399001, "No": 0.00015718784544666652}, "ground_truth": 1}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9970106141387369, "res": {"Yes": 0.9970106141387369, "No": 0.0029894107039023497}, "ground_truth": 0}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9954499559576412, "res": {"Yes": 0.9954499559576412, "No": 0.004550027509470875}, "ground_truth": 0}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9271468979548274, "res": {"Yes": 0.9271468979548274, "No": 0.07285254588542067}, "ground_truth": 0}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9970031414724871, "res": {"Yes": 0.9970031414724871, "No": 0.0029968336430881153}, "ground_truth": 0}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9556640660784171, "res": {"Yes": 0.9556640660784171, "No": 0.04433580451686707}, "ground_truth": 1}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9715274735969133, "res": {"Yes": 0.9715274735969133, "No": 0.02847240802872799}, "ground_truth": 0}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9819399727911046, "res": {"Yes": 0.9819399727911046, "No": 0.01805993193186723}, "ground_truth": 0}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8087401277840872, "res": {"Yes": 0.8087401277840872, "No": 0.19125979345092875}, "ground_truth": 0}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.918237396446274, "res": {"Yes": 0.918237396446274, "No": 0.08176226907807266}, "ground_truth": 0}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9626036310429454, "res": {"Yes": 0.9626036310429454, "No": 0.03739616200554704}, "ground_truth": 1}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9943895930475338, "res": {"Yes": 0.9943895930475338, "No": 0.005610345359130603}, "ground_truth": 0}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9976602719406811, "res": {"Yes": 0.9976602719406811, "No": 0.002339718864982892}, "ground_truth": 0}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.012247915741726316, "res": {"No": 0.9877520013718292, "Yes": 0.012247915741726316}, "ground_truth": 0}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8951486948586944, "res": {"Yes": 0.8951486948586944, "No": 0.10485122902054979}, "ground_truth": 0}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9704480702964484, "res": {"Yes": 0.9704480702964484, "No": 0.02955185388124351}, "ground_truth": 1}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.21616013072717638, "res": {"No": 0.7838397892190816, "Yes": 0.21616013072717638}, "ground_truth": 0}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.927306686947575, "res": {"Yes": 0.927306686947575, "No": 0.07269325334904983}, "ground_truth": 0}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.31565093879768386, "res": {"No": 0.6843485808818905, "Yes": 0.31565093879768386}, "ground_truth": 0}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8690811542390346, "res": {"Yes": 0.8690811542390346, "No": 0.13091814479153643}, "ground_truth": 0}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.35136303579901124, "res": {"No": 0.6486364556581327, "Yes": 0.35136303579901124}, "ground_truth": 1}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9697484897727091, "res": {"Yes": 0.9697484897727091, "No": 0.030251379265228143}, "ground_truth": 0}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9704011386217367, "res": {"Yes": 0.9704011386217367, "No": 0.029598482081424363}, "ground_truth": 0}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.939777111744273, "res": {"Yes": 0.939777111744273, "No": 0.06022272537368642}, "ground_truth": 0}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9867615918536986, "res": {"Yes": 0.9867615918536986, "No": 0.013238209126116014}, "ground_truth": 0}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9752404002668829, "res": {"Yes": 0.9752404002668829, "No": 0.024759153994117364}, "ground_truth": 1}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9912858244551345, "res": {"Yes": 0.9912858244551345, "No": 0.008713985633125775}, "ground_truth": 0}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9871443213489898, "res": {"Yes": 0.9871443213489898, "No": 0.012855339868380115}, "ground_truth": 0}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9137363102252594, "res": {"Yes": 0.9137363102252594, "No": 0.08626361331406436}, "ground_truth": 0}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8682962679812816, "res": {"Yes": 0.8682962679812816, "No": 0.13170364422009115}, "ground_truth": 0}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9975832720006823, "res": {"Yes": 0.9975832720006823, "No": 0.002416697355588708}, "ground_truth": 1}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9406877305338959, "res": {"Yes": 0.9406877305338959, "No": 0.05931210276250393}, "ground_truth": 0}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9798483521455795, "res": {"Yes": 0.9798483521455795, "No": 0.02015169449662193}, "ground_truth": 0}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7801992603002057, "res": {"Yes": 0.7801992603002057, "No": 0.2198002059554695}, "ground_truth": 0}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6349091253599382, "res": {"Yes": 0.6349091253599382, "No": 0.3650902849352892}, "ground_truth": 0}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8994717971517426, "res": {"Yes": 0.8994717971517426, "No": 0.10052757610664206}, "ground_truth": 1}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.532409712374921, "res": {"Yes": 0.532409712374921, "No": 0.46758987628165577}, "ground_truth": 0}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8654897043898997, "res": {"Yes": 0.8654897043898997, "No": 0.13450960094461603}, "ground_truth": 0}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.05137121411876457, "res": {"No": 0.9486285348281381, "Yes": 0.05137121411876457}, "ground_truth": 0}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6717512706072826, "res": {"Yes": 0.6717512706072826, "No": 0.32824824905326766}, "ground_truth": 0}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6131340551030668, "res": {"Yes": 0.6131340551030668, "No": 0.38686553542907837}, "ground_truth": 1}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7708749966624814, "res": {"Yes": 0.7708749966624814, "No": 0.2291244896984704}, "ground_truth": 0}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.40550286098560717, "res": {"No": 0.5944968270735653, "Yes": 0.40550286098560717}, "ground_truth": 0}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.68925217739811, "res": {"Yes": 0.68925217739811, "No": 0.31074734266111365}, "ground_truth": 0}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7612317758611947, "res": {"Yes": 0.7612317758611947, "No": 0.23876771581683007}, "ground_truth": 0}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9071244787514621, "res": {"Yes": 0.9071244787514621, "No": 0.09287528567875825}, "ground_truth": 1}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9960554942810829, "res": {"Yes": 0.9960554942810829, "No": 0.0039445066087357376}, "ground_truth": 0}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9890240560950326, "res": {"Yes": 0.9890240560950326, "No": 0.010975673007863616}, "ground_truth": 0}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8175916437886354, "res": {"Yes": 0.8175916437886354, "No": 0.18240778889106055}, "ground_truth": 0}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.988275195562556, "res": {"Yes": 0.988275195562556, "No": 0.011724545836814978}, "ground_truth": 0}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9979943877335121, "res": {"Yes": 0.9979943877335121, "No": 0.0020054696625379422}, "ground_truth": 1}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9975668948105385, "res": {"Yes": 0.9975668948105385, "No": 0.0024329365364624896}, "ground_truth": 0}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9823579655645879, "res": {"Yes": 0.9823579655645879, "No": 0.01764188455369774}, "ground_truth": 0}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9923509691730171, "res": {"Yes": 0.9923509691730171, "No": 0.007648888009075631}, "ground_truth": 0}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7898475474935169, "res": {"Yes": 0.7898475474935169, "No": 0.21015200011866011}, "ground_truth": 0}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9862670364541417, "res": {"Yes": 0.9862670364541417, "No": 0.013732753954003168}, "ground_truth": 1}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9828957179165777, "res": {"Yes": 0.9828957179165777, "No": 0.017104279219938446}, "ground_truth": 0}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6863188934591167, "res": {"Yes": 0.6863188934591167, "No": 0.313680759419591}, "ground_truth": 0}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9941374004991618, "res": {"Yes": 0.9941374004991618, "No": 0.005862490885049184}, "ground_truth": 0}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9897027113655369, "res": {"Yes": 0.9897027113655369, "No": 0.010297042097388617}, "ground_truth": 0}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9672092268240096, "res": {"Yes": 0.9672092268240096, "No": 0.032790579998767305}, "ground_truth": 1}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9978007778404818, "res": {"Yes": 0.9978007778404818, "No": 0.0021991870711902364}, "ground_truth": 0}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9808994746417411, "res": {"Yes": 0.9808994746417411, "No": 0.019100438234081845}, "ground_truth": 0}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9902272834107462, "res": {"Yes": 0.9902272834107462, "No": 0.00977264620059764}, "ground_truth": 0}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9333513482283446, "res": {"Yes": 0.9333513482283446, "No": 0.06664838924608614}, "ground_truth": 0}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9875917491412407, "res": {"Yes": 0.9875917491412407, "No": 0.01240815948238773}, "ground_truth": 1}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.974317674498997, "res": {"Yes": 0.974317674498997, "No": 0.025682265080007212}, "ground_truth": 0}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9696282813824566, "res": {"Yes": 0.9696282813824566, "No": 0.030371573204452034}, "ground_truth": 0}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9867976971341069, "res": {"Yes": 0.9867976971341069, "No": 0.013202025687361792}, "ground_truth": 0}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9889034974549379, "res": {"Yes": 0.9889034974549379, "No": 0.011096412734154735}, "ground_truth": 0}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7387378478558764, "res": {"Yes": 0.7387378478558764, "No": 0.2612615672835121}, "ground_truth": 1}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9639271141097229, "res": {"Yes": 0.9639271141097229, "No": 0.03607266148947203}, "ground_truth": 0}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9610784803977725, "res": {"Yes": 0.9610784803977725, "No": 0.03892107505002309}, "ground_truth": 0}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8878875916343537, "res": {"Yes": 0.8878875916343537, "No": 0.11211193355728671}, "ground_truth": 0}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.1747543684320715, "res": {"No": 0.8252454431874316, "Yes": 0.1747543684320715}, "ground_truth": 0}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8480962137630944, "res": {"Yes": 0.8480962137630944, "No": 0.15190338275655005}, "ground_truth": 1}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8349594902323113, "res": {"Yes": 0.8349594902323113, "No": 0.16504039778304633}, "ground_truth": 0}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.5220097881657073, "res": {"Yes": 0.5220097881657073, "No": 0.47798995564807945}, "ground_truth": 0}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7572881580882523, "res": {"Yes": 0.7572881580882523, "No": 0.24271139897645488}, "ground_truth": 0}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9537195943466629, "res": {"Yes": 0.9537195943466629, "No": 0.046280271972469415}, "ground_truth": 0}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9434367377792067, "res": {"Yes": 0.9434367377792067, "No": 0.05656319897665649}, "ground_truth": 1}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9331115133513402, "res": {"Yes": 0.9331115133513402, "No": 0.06688822073975266}, "ground_truth": 0}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7265587733007419, "res": {"Yes": 0.7265587733007419, "No": 0.27344091865462944}, "ground_truth": 0}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9763889554780628, "res": {"Yes": 0.9763889554780628, "No": 0.02361098521591348}, "ground_truth": 0}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8001690109017818, "res": {"Yes": 0.8001690109017818, "No": 0.19983079410380458}, "ground_truth": 0}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9412161860769251, "res": {"Yes": 0.9412161860769251, "No": 0.05878372782773691}, "ground_truth": 1}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8426253621674938, "res": {"Yes": 0.8426253621674938, "No": 0.15737443134497625}, "ground_truth": 0}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9484982098094935, "res": {"Yes": 0.9484982098094935, "No": 0.05150150234450358}, "ground_truth": 0}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6851457428953142, "res": {"Yes": 0.6851457428953142, "No": 0.3148538235765361}, "ground_truth": 0}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9072624229585594, "res": {"Yes": 0.9072624229585594, "No": 0.0927371622011641}, "ground_truth": 0}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9919746335920135, "res": {"Yes": 0.9919746335920135, "No": 0.0080253154686781}, "ground_truth": 1}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7966687640734309, "res": {"Yes": 0.7966687640734309, "No": 0.2033308751463801}, "ground_truth": 0}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.909261797699887, "res": {"Yes": 0.909261797699887, "No": 0.09073797695123327}, "ground_truth": 0}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5279031395111754, "res": {"Yes": 0.5279031395111754, "No": 0.4720968470575825}, "ground_truth": 0}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9095491789258572, "res": {"Yes": 0.9095491789258572, "No": 0.09045074776597928}, "ground_truth": 0}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6559241839264534, "res": {"Yes": 0.6559241839264534, "No": 0.3440756957604522}, "ground_truth": 1}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.948555497985871, "res": {"Yes": 0.948555497985871, "No": 0.051444331381621196}, "ground_truth": 0}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.849259564257627, "res": {"Yes": 0.849259564257627, "No": 0.15074043421726682}, "ground_truth": 0}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9107785734930893, "res": {"Yes": 0.9107785734930893, "No": 0.08922119103089135}, "ground_truth": 0}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9446373459905474, "res": {"Yes": 0.9446373459905474, "No": 0.05536248946633428}, "ground_truth": 0}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8590167566746161, "res": {"Yes": 0.8590167566746161, "No": 0.14098316399838481}, "ground_truth": 1}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9281544507933382, "res": {"Yes": 0.9281544507933382, "No": 0.0718454065217831}, "ground_truth": 0}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9394006720368946, "res": {"Yes": 0.9394006720368946, "No": 0.06059927801025714}, "ground_truth": 0}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7100215764800525, "res": {"Yes": 0.7100215764800525, "No": 0.28997844104186854}, "ground_truth": 0}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.5269731397268222, "res": {"Yes": 0.5269731397268222, "No": 0.473026626151259}, "ground_truth": 0}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5259963528572129, "res": {"Yes": 0.5259963528572129, "No": 0.47400320296488835}, "ground_truth": 1}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6541165450244361, "res": {"Yes": 0.6541165450244361, "No": 0.3458832806492795}, "ground_truth": 0}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8166074066745235, "res": {"Yes": 0.8166074066745235, "No": 0.18339242925016683}, "ground_truth": 0}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9869015995729371, "res": {"Yes": 0.9869015995729371, "No": 0.013098119613922002}, "ground_truth": 0}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8905626277299417, "res": {"Yes": 0.8905626277299417, "No": 0.10943658827298858}, "ground_truth": 0}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9631440863483989, "res": {"Yes": 0.9631440863483989, "No": 0.036855047334413714}, "ground_truth": 1}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9038752101392307, "res": {"Yes": 0.9038752101392307, "No": 0.09612414940866548}, "ground_truth": 0}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9611105242495716, "res": {"Yes": 0.9611105242495716, "No": 0.03888913007210499}, "ground_truth": 0}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9798350694113483, "res": {"Yes": 0.9798350694113483, "No": 0.02016472476287289}, "ground_truth": 0}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998374752085851, "res": {"Yes": 0.9998374752085851, "No": 0.0001624131760849217}, "ground_truth": 0}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9970445062959936, "res": {"Yes": 0.9970445062959936, "No": 0.00295519216435462}, "ground_truth": 1}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992631865804186, "res": {"Yes": 0.9992631865804186, "No": 0.0007366265578207618}, "ground_truth": 0}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9916614045705568, "res": {"Yes": 0.9916614045705568, "No": 0.008338361039243566}, "ground_truth": 0}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8125908139571301, "res": {"Yes": 0.8125908139571301, "No": 0.18740901587679404}, "ground_truth": 0}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988383099848774, "res": {"Yes": 0.9988383099848774, "No": 0.0011615917688081346}, "ground_truth": 0}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8990430078496621, "res": {"Yes": 0.8990430078496621, "No": 0.10095683800142193}, "ground_truth": 1}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8784440776603435, "res": {"Yes": 0.8784440776603435, "No": 0.12155574680348984}, "ground_truth": 0}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9265307540627644, "res": {"Yes": 0.9265307540627644, "No": 0.07346898019571867}, "ground_truth": 0}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.012362201845232205, "res": {"No": 0.9876376850769546, "Yes": 0.012362201845232205}, "ground_truth": 0}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9544384785554316, "res": {"Yes": 0.9544384785554316, "No": 0.04556146434029988}, "ground_truth": 0}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9631197566729309, "res": {"Yes": 0.9631197566729309, "No": 0.036880135729483045}, "ground_truth": 1}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7820080299588255, "res": {"Yes": 0.7820080299588255, "No": 0.217991567093194}, "ground_truth": 0}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9825237642494378, "res": {"Yes": 0.9825237642494378, "No": 0.017476258085602842}, "ground_truth": 0}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9791515838932106, "res": {"Yes": 0.9791515838932106, "No": 0.020848124225255563}, "ground_truth": 0}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990618240624914, "res": {"Yes": 0.9990618240624914, "No": 0.0009380005712028245}, "ground_truth": 0}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999793858390102, "res": {"Yes": 0.999793858390102, "No": 0.00020609630467411478}, "ground_truth": 1}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9922321856199077, "res": {"Yes": 0.9922321856199077, "No": 0.0077675793585998965}, "ground_truth": 0}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9854688630176149, "res": {"Yes": 0.9854688630176149, "No": 0.014530912532995084}, "ground_truth": 0}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8160153605432856, "res": {"Yes": 0.8160153605432856, "No": 0.18398443819076737}, "ground_truth": 0}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9978159622886955, "res": {"Yes": 0.9978159622886955, "No": 0.0021840297486838654}, "ground_truth": 0}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.995350033426038, "res": {"Yes": 0.995350033426038, "No": 0.004649952846369439}, "ground_truth": 1}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9667823030357064, "res": {"Yes": 0.9667823030357064, "No": 0.033217534330264496}, "ground_truth": 0}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9985596152513155, "res": {"Yes": 0.9985596152513155, "No": 0.0014403444393550331}, "ground_truth": 0}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.976567415654282, "res": {"Yes": 0.976567415654282, "No": 0.023432510107461862}, "ground_truth": 0}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9693755050942683, "res": {"Yes": 0.9693755050942683, "No": 0.03062422856955682}, "ground_truth": 0}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9709450921732931, "res": {"Yes": 0.9709450921732931, "No": 0.02905478770911257}, "ground_truth": 1}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9282852889530995, "res": {"Yes": 0.9282852889530995, "No": 0.0717145086274253}, "ground_truth": 0}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7767171680220976, "res": {"Yes": 0.7767171680220976, "No": 0.22328234216457316}, "ground_truth": 0}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9926579282667084, "res": {"Yes": 0.9926579282667084, "No": 0.0073419045098898}, "ground_truth": 0}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9646101296527597, "res": {"Yes": 0.9646101296527597, "No": 0.03538962078490216}, "ground_truth": 0}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9832091769664704, "res": {"Yes": 0.9832091769664704, "No": 0.016790651597161695}, "ground_truth": 1}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.990351779639617, "res": {"Yes": 0.990351779639617, "No": 0.00964814799178104}, "ground_truth": 0}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9881463259620599, "res": {"Yes": 0.9881463259620599, "No": 0.011853472693846413}, "ground_truth": 0}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9383163797515663, "res": {"Yes": 0.9383163797515663, "No": 0.06168315499585315}, "ground_truth": 0}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9984644180143827, "res": {"Yes": 0.9984644180143827, "No": 0.0015355510072142325}, "ground_truth": 0}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9891112879073759, "res": {"Yes": 0.9891112879073759, "No": 0.010888599004912371}, "ground_truth": 1}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9171239746071133, "res": {"Yes": 0.9171239746071133, "No": 0.08287575669166214}, "ground_truth": 0}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9239856165185302, "res": {"Yes": 0.9239856165185302, "No": 0.0760142086689149}, "ground_truth": 0}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9538835671450129, "res": {"Yes": 0.9538835671450129, "No": 0.04611639741148107}, "ground_truth": 0}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8235543193280325, "res": {"Yes": 0.8235543193280325, "No": 0.1764452410066764}, "ground_truth": 0}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7758087541581813, "res": {"Yes": 0.7758087541581813, "No": 0.2241906215389018}, "ground_truth": 1}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996732894015348, "res": {"Yes": 0.9996732894015348, "No": 0.000326653690080553}, "ground_truth": 0}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9468755532444558, "res": {"Yes": 0.9468755532444558, "No": 0.053124200567072725}, "ground_truth": 0}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9039826466118862, "res": {"Yes": 0.9039826466118862, "No": 0.09601720354265687}, "ground_truth": 0}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.5259003146917326, "res": {"Yes": 0.5259003146917326, "No": 0.47409929315254085}, "ground_truth": 0}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9011226681234616, "res": {"Yes": 0.9011226681234616, "No": 0.09887722005484906}, "ground_truth": 1}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.952557353638841, "res": {"Yes": 0.952557353638841, "No": 0.04744258670296256}, "ground_truth": 0}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.5193059606280852, "res": {"Yes": 0.5193059606280852, "No": 0.4806937384821052}, "ground_truth": 0}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.3630775613173628, "res": {"No": 0.6369220827279275, "Yes": 0.3630775613173628}, "ground_truth": 0}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8457110213982487, "res": {"Yes": 0.8457110213982487, "No": 0.15428827667166642}, "ground_truth": 0}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8968977002241498, "res": {"Yes": 0.8968977002241498, "No": 0.10310196798737825}, "ground_truth": 1}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9264316020471836, "res": {"Yes": 0.9264316020471836, "No": 0.07356823029772817}, "ground_truth": 0}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8562373099601696, "res": {"Yes": 0.8562373099601696, "No": 0.14376239168411697}, "ground_truth": 0}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8919334605326042, "res": {"Yes": 0.8919334605326042, "No": 0.10806644603243827}, "ground_truth": 0}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993773482281185, "res": {"Yes": 0.9993773482281185, "No": 0.0006226535458755211}, "ground_truth": 0}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998766124151556, "res": {"Yes": 0.998766124151556, "No": 0.0012338886408426561}, "ground_truth": 1}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9892350483081422, "res": {"Yes": 0.9892350483081422, "No": 0.01076481859153349}, "ground_truth": 0}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989319161080835, "res": {"Yes": 0.9989319161080835, "No": 0.0010680065558784568}, "ground_truth": 0}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6615478251537934, "res": {"Yes": 0.6615478251537934, "No": 0.3384518980422364}, "ground_truth": 0}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9867684449367513, "res": {"Yes": 0.9867684449367513, "No": 0.013231331016264827}, "ground_truth": 0}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8844045988909364, "res": {"Yes": 0.8844045988909364, "No": 0.1155951590905529}, "ground_truth": 1}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8358756675367239, "res": {"Yes": 0.8358756675367239, "No": 0.16412407209882288}, "ground_truth": 0}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.010387743265639413, "res": {"No": 0.9896119850230334, "Yes": 0.010387743265639413}, "ground_truth": 0}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9704225779309207, "res": {"Yes": 0.9704225779309207, "No": 0.029577356308016823}, "ground_truth": 0}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986585265809607, "res": {"Yes": 0.9986585265809607, "No": 0.0013414381423104645}, "ground_truth": 0}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9747505128350497, "res": {"Yes": 0.9747505128350497, "No": 0.025248960308740093}, "ground_truth": 1}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9084681168845302, "res": {"Yes": 0.9084681168845302, "No": 0.09153169255046507}, "ground_truth": 0}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9006037565137024, "res": {"Yes": 0.9006037565137024, "No": 0.09939608027005624}, "ground_truth": 0}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9265465052194679, "res": {"Yes": 0.9265465052194679, "No": 0.07345340642279545}, "ground_truth": 0}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9805150383828184, "res": {"Yes": 0.9805150383828184, "No": 0.019484977401143785}, "ground_truth": 0}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9402041278557204, "res": {"Yes": 0.9402041278557204, "No": 0.05979580924477787}, "ground_truth": 1}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.454295278128822, "res": {"No": 0.5457046574547513, "Yes": 0.454295278128822}, "ground_truth": 0}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7685673358484852, "res": {"Yes": 0.7685673358484852, "No": 0.23143222798515112}, "ground_truth": 0}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8507153459159659, "res": {"Yes": 0.8507153459159659, "No": 0.14928461907577073}, "ground_truth": 0}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.744594503043196, "res": {"Yes": 0.744594503043196, "No": 0.255405270331374}, "ground_truth": 0}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9636762895372475, "res": {"Yes": 0.9636762895372475, "No": 0.03632365852529514}, "ground_truth": 1}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8866294273374254, "res": {"Yes": 0.8866294273374254, "No": 0.11337042774044655}, "ground_truth": 0}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9072562408934718, "res": {"Yes": 0.9072562408934718, "No": 0.09274359838355382}, "ground_truth": 0}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5888262325472728, "res": {"Yes": 0.5888262325472728, "No": 0.4111736037979572}, "ground_truth": 0}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9637308794904812, "res": {"Yes": 0.9637308794904812, "No": 0.03626881492204022}, "ground_truth": 0}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9336822239145358, "res": {"Yes": 0.9336822239145358, "No": 0.06631764356991046}, "ground_truth": 1}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9315007069533383, "res": {"Yes": 0.9315007069533383, "No": 0.06849900906657366}, "ground_truth": 0}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8768704298493799, "res": {"Yes": 0.8768704298493799, "No": 0.12312940542850352}, "ground_truth": 0}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9651199836341275, "res": {"Yes": 0.9651199836341275, "No": 0.03487984931668005}, "ground_truth": 0}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8202084202362521, "res": {"Yes": 0.8202084202362521, "No": 0.17979139802441618}, "ground_truth": 0}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9732165825668947, "res": {"Yes": 0.9732165825668947, "No": 0.026783126445382843}, "ground_truth": 1}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9643056310133103, "res": {"Yes": 0.9643056310133103, "No": 0.03569430595415675}, "ground_truth": 0}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9122675552028381, "res": {"Yes": 0.9122675552028381, "No": 0.08773220756388922}, "ground_truth": 0}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.4989384188322311, "res": {"No": 0.5010612800039712, "Yes": 0.4989384188322311}, "ground_truth": 0}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.927689706597471, "res": {"Yes": 0.927689706597471, "No": 0.07231027816230146}, "ground_truth": 0}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9948835070693574, "res": {"Yes": 0.9948835070693574, "No": 0.005116489600404355}, "ground_truth": 1}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9974283643349613, "res": {"Yes": 0.9974283643349613, "No": 0.0025716411605791432}, "ground_truth": 0}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9877409499703863, "res": {"Yes": 0.9877409499703863, "No": 0.012258905825121237}, "ground_truth": 0}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.577705474423742, "res": {"Yes": 0.577705474423742, "No": 0.42229413726763376}, "ground_truth": 0}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8682287692531182, "res": {"Yes": 0.8682287692531182, "No": 0.13177080385545104}, "ground_truth": 0}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8802286902390205, "res": {"Yes": 0.8802286902390205, "No": 0.11977075958647002}, "ground_truth": 1}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9726250729906876, "res": {"Yes": 0.9726250729906876, "No": 0.027374649351640565}, "ground_truth": 0}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9069297998891885, "res": {"Yes": 0.9069297998891885, "No": 0.09306987722069608}, "ground_truth": 0}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9891909511710472, "res": {"Yes": 0.9891909511710472, "No": 0.010808936859930514}, "ground_truth": 0}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9425842412665684, "res": {"Yes": 0.9425842412665684, "No": 0.05741559630116154}, "ground_truth": 0}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9906731890518171, "res": {"Yes": 0.9906731890518171, "No": 0.009326724477376924}, "ground_truth": 1}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9355805951762248, "res": {"Yes": 0.9355805951762248, "No": 0.0644192678759549}, "ground_truth": 0}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9933600614627119, "res": {"Yes": 0.9933600614627119, "No": 0.006639931047236815}, "ground_truth": 0}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.972940481266101, "res": {"Yes": 0.972940481266101, "No": 0.027059341265858808}, "ground_truth": 0}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9887982362579621, "res": {"Yes": 0.9887982362579621, "No": 0.011201506508939156}, "ground_truth": 0}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9423533072015045, "res": {"Yes": 0.9423533072015045, "No": 0.05764641614159786}, "ground_truth": 1}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9153100304536712, "res": {"Yes": 0.9153100304536712, "No": 0.0846897669375591}, "ground_truth": 0}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8994583906245185, "res": {"Yes": 0.8994583906245185, "No": 0.10054139959870004}, "ground_truth": 0}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7296714169050768, "res": {"Yes": 0.7296714169050768, "No": 0.2703280655435755}, "ground_truth": 0}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7700196461341198, "res": {"Yes": 0.7700196461341198, "No": 0.22998002998056694}, "ground_truth": 0}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5510742080725837, "res": {"Yes": 0.5510742080725837, "No": 0.44892515400562216}, "ground_truth": 1}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9124465596569316, "res": {"Yes": 0.9124465596569316, "No": 0.08755315362970863}, "ground_truth": 0}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9236365674349345, "res": {"Yes": 0.9236365674349345, "No": 0.07636326676018941}, "ground_truth": 0}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9705339548690419, "res": {"Yes": 0.9705339548690419, "No": 0.029465985935792412}, "ground_truth": 0}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9863302345254877, "res": {"Yes": 0.9863302345254877, "No": 0.013669662741287026}, "ground_truth": 0}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9448072467373121, "res": {"Yes": 0.9448072467373121, "No": 0.05519256647915299}, "ground_truth": 1}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.854959268809021, "res": {"Yes": 0.854959268809021, "No": 0.14504058197932254}, "ground_truth": 0}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9462319289081497, "res": {"Yes": 0.9462319289081497, "No": 0.053767993779820916}, "ground_truth": 0}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9579756058093909, "res": {"Yes": 0.9579756058093909, "No": 0.04202424774609822}, "ground_truth": 0}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.870259035235288, "res": {"Yes": 0.870259035235288, "No": 0.12974088628255553}, "ground_truth": 0}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9301974718985645, "res": {"Yes": 0.9301974718985645, "No": 0.06980207792205287}, "ground_truth": 1}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9946498184755468, "res": {"Yes": 0.9946498184755468, "No": 0.005350153643903257}, "ground_truth": 0}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8351711208315596, "res": {"Yes": 0.8351711208315596, "No": 0.16482863838436734}, "ground_truth": 0}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.3566985684292605, "res": {"No": 0.6433010296444058, "Yes": 0.3566985684292605}, "ground_truth": 0}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8080593905551116, "res": {"Yes": 0.8080593905551116, "No": 0.19194043297733063}, "ground_truth": 0}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9803833669964427, "res": {"Yes": 0.9803833669964427, "No": 0.019616596712658762}, "ground_truth": 1}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7320442470924889, "res": {"Yes": 0.7320442470924889, "No": 0.26795546169157314}, "ground_truth": 0}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9533199856507606, "res": {"Yes": 0.9533199856507606, "No": 0.046679951992622024}, "ground_truth": 0}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7803348551091523, "res": {"Yes": 0.7803348551091523, "No": 0.21966487268877907}, "ground_truth": 0}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8755362965802029, "res": {"Yes": 0.8755362965802029, "No": 0.12446343512362318}, "ground_truth": 0}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994843926711681, "res": {"Yes": 0.9994843926711681, "No": 0.0005154934297403629}, "ground_truth": 1}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984593073873385, "res": {"Yes": 0.9984593073873385, "No": 0.0015406883221183991}, "ground_truth": 0}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8684012600302096, "res": {"Yes": 0.8684012600302096, "No": 0.13159853399613067}, "ground_truth": 0}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8052782492862104, "res": {"Yes": 0.8052782492862104, "No": 0.19472166481305825}, "ground_truth": 0}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8660000609834639, "res": {"Yes": 0.8660000609834639, "No": 0.1339997559904064}, "ground_truth": 0}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9962252376947909, "res": {"Yes": 0.9962252376947909, "No": 0.003774769340508485}, "ground_truth": 1}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9244925283644506, "res": {"Yes": 0.9244925283644506, "No": 0.07550725006398033}, "ground_truth": 0}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.5215651939429777, "res": {"Yes": 0.5215651939429777, "No": 0.478434415585641}, "ground_truth": 0}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8663020878600546, "res": {"Yes": 0.8663020878600546, "No": 0.1336972050139007}, "ground_truth": 0}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.95195869760559, "res": {"Yes": 0.95195869760559, "No": 0.04804106562406649}, "ground_truth": 0}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9617355212990915, "res": {"Yes": 0.9617355212990915, "No": 0.03826378850030939}, "ground_truth": 1}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9813582504895175, "res": {"Yes": 0.9813582504895175, "No": 0.01864158203880091}, "ground_truth": 0}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8411461524347421, "res": {"Yes": 0.8411461524347421, "No": 0.1588536088376444}, "ground_truth": 0}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9475180159604913, "res": {"Yes": 0.9475180159604913, "No": 0.05248161821144212}, "ground_truth": 0}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9682219798190752, "res": {"Yes": 0.9682219798190752, "No": 0.031777766512510006}, "ground_truth": 0}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9494419737735751, "res": {"Yes": 0.9494419737735751, "No": 0.05055774149194225}, "ground_truth": 1}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8124634097062559, "res": {"Yes": 0.8124634097062559, "No": 0.18753623479546516}, "ground_truth": 0}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9562743386330608, "res": {"Yes": 0.9562743386330608, "No": 0.043725492262208385}, "ground_truth": 0}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9744193157484736, "res": {"Yes": 0.9744193157484736, "No": 0.025580419076261067}, "ground_truth": 0}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9963411957956552, "res": {"Yes": 0.9963411957956552, "No": 0.0036586816550353302}, "ground_truth": 0}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9909757234256368, "res": {"Yes": 0.9909757234256368, "No": 0.009024131609019045}, "ground_truth": 1}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9618742510738295, "res": {"Yes": 0.9618742510738295, "No": 0.0381254896827249}, "ground_truth": 0}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9832293437767191, "res": {"Yes": 0.9832293437767191, "No": 0.01677044781775457}, "ground_truth": 0}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9997314288216976, "res": {"Yes": 0.9997314288216976, "No": 0.000268446209806896}, "ground_truth": 0}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9420048460427041, "res": {"Yes": 0.9420048460427041, "No": 0.05799492919094924}, "ground_truth": 0}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992823460368755, "res": {"Yes": 0.9992823460368755, "No": 0.0007175452113365081}, "ground_truth": 1}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9983651945526011, "res": {"Yes": 0.9983651945526011, "No": 0.0016348094921245398}, "ground_truth": 0}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.974499350042358, "res": {"Yes": 0.974499350042358, "No": 0.0255003780866557}, "ground_truth": 0}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.2660941727395049, "res": {"No": 0.7339056653123653, "Yes": 0.2660941727395049}, "ground_truth": 0}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.974420788097172, "res": {"Yes": 0.974420788097172, "No": 0.025579091486962253}, "ground_truth": 0}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9851418135755929, "res": {"Yes": 0.9851418135755929, "No": 0.014858152120734705}, "ground_truth": 1}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9440017519114103, "res": {"Yes": 0.9440017519114103, "No": 0.05599811777346396}, "ground_truth": 0}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9723713987722927, "res": {"Yes": 0.9723713987722927, "No": 0.027628492549616995}, "ground_truth": 0}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.0937813471894513, "res": {"No": 0.9062182988539852, "Yes": 0.0937813471894513}, "ground_truth": 0}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7388313336578659, "res": {"Yes": 0.7388313336578659, "No": 0.2611680523825906}, "ground_truth": 0}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9606955553955643, "res": {"Yes": 0.9606955553955643, "No": 0.03930431380893975}, "ground_truth": 1}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7076334962392427, "res": {"Yes": 0.7076334962392427, "No": 0.29236630316534334}, "ground_truth": 0}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9138512928991582, "res": {"Yes": 0.9138512928991582, "No": 0.0861485109969743}, "ground_truth": 0}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7172476073445363, "res": {"Yes": 0.7172476073445363, "No": 0.28275199734906986}, "ground_truth": 0}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.574633082074569, "res": {"Yes": 0.574633082074569, "No": 0.4253662144639918}, "ground_truth": 0}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8798455308875052, "res": {"Yes": 0.8798455308875052, "No": 0.12015418331130337}, "ground_truth": 1}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9656030119030734, "res": {"Yes": 0.9656030119030734, "No": 0.03439680145104726}, "ground_truth": 0}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.769619224563851, "res": {"Yes": 0.769619224563851, "No": 0.2303801058558968}, "ground_truth": 0}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7891276690575406, "res": {"Yes": 0.7891276690575406, "No": 0.21087228776765474}, "ground_truth": 0}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987007293838814, "res": {"Yes": 0.9987007293838814, "No": 0.0012991845247452393}, "ground_truth": 0}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9841784664811367, "res": {"Yes": 0.9841784664811367, "No": 0.015821466845285834}, "ground_truth": 1}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9954616525633412, "res": {"Yes": 0.9954616525633412, "No": 0.004538294492285009}, "ground_truth": 0}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9918734057620453, "res": {"Yes": 0.9918734057620453, "No": 0.008126554061079433}, "ground_truth": 0}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9626916680583785, "res": {"Yes": 0.9626916680583785, "No": 0.03730810439231448}, "ground_truth": 0}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9575539546605266, "res": {"Yes": 0.9575539546605266, "No": 0.04244598298631752}, "ground_truth": 0}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.975193816096068, "res": {"Yes": 0.975193816096068, "No": 0.024805903742711473}, "ground_truth": 1}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9757379524124449, "res": {"Yes": 0.9757379524124449, "No": 0.024261913594277644}, "ground_truth": 0}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8559623478194904, "res": {"Yes": 0.8559623478194904, "No": 0.1440372978639645}, "ground_truth": 0}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8754699684629148, "res": {"Yes": 0.8754699684629148, "No": 0.12452997861185977}, "ground_truth": 0}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9089558825780668, "res": {"Yes": 0.9089558825780668, "No": 0.09104406125790161}, "ground_truth": 0}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9982471081126773, "res": {"Yes": 0.9982471081126773, "No": 0.0017528451696823943}, "ground_truth": 1}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9779911842962113, "res": {"Yes": 0.9779911842962113, "No": 0.022008764909252153}, "ground_truth": 0}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9130715531412005, "res": {"Yes": 0.9130715531412005, "No": 0.08692832938585195}, "ground_truth": 0}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9977816686418705, "res": {"Yes": 0.9977816686418705, "No": 0.002218321611254341}, "ground_truth": 0}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.996973042501691, "res": {"Yes": 0.996973042501691, "No": 0.0030269129718001973}, "ground_truth": 0}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989666539700978, "res": {"Yes": 0.9989666539700978, "No": 0.0010333054917395325}, "ground_truth": 1}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995043977513863, "res": {"Yes": 0.9995043977513863, "No": 0.0004955169484680943}, "ground_truth": 0}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9948070477381774, "res": {"Yes": 0.9948070477381774, "No": 0.005192956971603192}, "ground_truth": 0}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9923865357683387, "res": {"Yes": 0.9923865357683387, "No": 0.007613441312958418}, "ground_truth": 0}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9598570297065983, "res": {"Yes": 0.9598570297065983, "No": 0.040142723013901364}, "ground_truth": 0}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9886573405708959, "res": {"Yes": 0.9886573405708959, "No": 0.011342528882875439}, "ground_truth": 1}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9733579681159857, "res": {"Yes": 0.9733579681159857, "No": 0.026641903051346852}, "ground_truth": 0}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9698745167773823, "res": {"Yes": 0.9698745167773823, "No": 0.030125285802391683}, "ground_truth": 0}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7117384148142623, "res": {"Yes": 0.7117384148142623, "No": 0.28826133416468047}, "ground_truth": 0}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9475220884016756, "res": {"Yes": 0.9475220884016756, "No": 0.05247783967096}, "ground_truth": 0}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9903789106982461, "res": {"Yes": 0.9903789106982461, "No": 0.00962099104865472}, "ground_truth": 1}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8989558192906048, "res": {"Yes": 0.8989558192906048, "No": 0.10104412261107028}, "ground_truth": 0}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9754599652469967, "res": {"Yes": 0.9754599652469967, "No": 0.0245399764342339}, "ground_truth": 0}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.931156381755813, "res": {"Yes": 0.931156381755813, "No": 0.06884349768670874}, "ground_truth": 0}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9174118224439873, "res": {"Yes": 0.9174118224439873, "No": 0.08258800722310584}, "ground_truth": 0}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9499349256970457, "res": {"Yes": 0.9499349256970457, "No": 0.05006490576886567}, "ground_truth": 1}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9535369259003664, "res": {"Yes": 0.9535369259003664, "No": 0.0464629666419}, "ground_truth": 0}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.773234200666574, "res": {"Yes": 0.773234200666574, "No": 0.2267655079598161}, "ground_truth": 0}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9455789439259705, "res": {"Yes": 0.9455789439259705, "No": 0.054420900420472325}, "ground_truth": 0}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9971409671411633, "res": {"Yes": 0.9971409671411633, "No": 0.002859023917470414}, "ground_truth": 0}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9932907780080389, "res": {"Yes": 0.9932907780080389, "No": 0.00670915008470162}, "ground_truth": 1}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9902766147920843, "res": {"Yes": 0.9902766147920843, "No": 0.009723304977904306}, "ground_truth": 0}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9749335803721274, "res": {"Yes": 0.9749335803721274, "No": 0.025066354255087794}, "ground_truth": 0}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7346469979765862, "res": {"Yes": 0.7346469979765862, "No": 0.26535288429275533}, "ground_truth": 0}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9532986458217287, "res": {"Yes": 0.9532986458217287, "No": 0.046701172852877865}, "ground_truth": 0}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7938454532042937, "res": {"Yes": 0.7938454532042937, "No": 0.206154539437774}, "ground_truth": 1}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.15793524053602595, "res": {"No": 0.8420646383918632, "Yes": 0.15793524053602595}, "ground_truth": 0}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9514069823349572, "res": {"Yes": 0.9514069823349572, "No": 0.04859290725409104}, "ground_truth": 0}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.680852547344821, "res": {"Yes": 0.680852547344821, "No": 0.31914689381755046}, "ground_truth": 0}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9723847038211684, "res": {"Yes": 0.9723847038211684, "No": 0.02761496618901515}, "ground_truth": 0}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9604934847330687, "res": {"Yes": 0.9604934847330687, "No": 0.039506016587637714}, "ground_truth": 1}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9362672778580238, "res": {"Yes": 0.9362672778580238, "No": 0.06373250270431254}, "ground_truth": 0}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9058855675941828, "res": {"Yes": 0.9058855675941828, "No": 0.09411416616418111}, "ground_truth": 0}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.653700293095255, "res": {"Yes": 0.653700293095255, "No": 0.3462993515794004}, "ground_truth": 0}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994581963686248, "res": {"Yes": 0.9994581963686248, "No": 0.0005415719113843336}, "ground_truth": 0}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.926754816287128, "res": {"Yes": 0.926754816287128, "No": 0.07324493355219572}, "ground_truth": 1}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9881358437614318, "res": {"Yes": 0.9881358437614318, "No": 0.011863972406106234}, "ground_truth": 0}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.975012678967121, "res": {"Yes": 0.975012678967121, "No": 0.024986949760209414}, "ground_truth": 0}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.2747783517333587, "res": {"No": 0.7252215025723402, "Yes": 0.2747783517333587}, "ground_truth": 0}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8783822725093701, "res": {"Yes": 0.8783822725093701, "No": 0.12161767091297276}, "ground_truth": 0}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5452052771034398, "res": {"Yes": 0.5452052771034398, "No": 0.4547946548151086}, "ground_truth": 1}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8940045681805802, "res": {"Yes": 0.8940045681805802, "No": 0.1059952497026708}, "ground_truth": 0}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6195880668911804, "res": {"Yes": 0.6195880668911804, "No": 0.38041177291964207}, "ground_truth": 0}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.999312703296656, "res": {"Yes": 0.999312703296656, "No": 0.0006871905079741496}, "ground_truth": 0}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981094479491226, "res": {"Yes": 0.9981094479491226, "No": 0.0018905887320065141}, "ground_truth": 0}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9894030544690453, "res": {"Yes": 0.9894030544690453, "No": 0.010596812824610734}, "ground_truth": 1}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987780189276646, "res": {"Yes": 0.9987780189276646, "No": 0.0012219401457462962}, "ground_truth": 0}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9875410583453141, "res": {"Yes": 0.9875410583453141, "No": 0.012458844583234712}, "ground_truth": 0}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5265946665800427, "res": {"Yes": 0.5265946665800427, "No": 0.4734051041800431}, "ground_truth": 0}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.30496102421485066, "res": {"No": 0.6950389165145788, "Yes": 0.30496102421485066}, "ground_truth": 0}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9743611378312199, "res": {"Yes": 0.9743611378312199, "No": 0.025638770512755767}, "ground_truth": 1}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.977517880713395, "res": {"Yes": 0.977517880713395, "No": 0.022482105439535254}, "ground_truth": 0}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8857869323257249, "res": {"Yes": 0.8857869323257249, "No": 0.11421290324591996}, "ground_truth": 0}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8844168126027825, "res": {"Yes": 0.8844168126027825, "No": 0.1155830915858786}, "ground_truth": 0}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6975855433576248, "res": {"Yes": 0.6975855433576248, "No": 0.30241440506933964}, "ground_truth": 0}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8368983847371139, "res": {"Yes": 0.8368983847371139, "No": 0.16310144659824324}, "ground_truth": 1}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5121788851012316, "res": {"Yes": 0.5121788851012316, "No": 0.4878211383931085}, "ground_truth": 0}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9713764963584239, "res": {"Yes": 0.9713764963584239, "No": 0.028623380360531248}, "ground_truth": 0}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.28903762761067725, "res": {"No": 0.710962281866693, "Yes": 0.28903762761067725}, "ground_truth": 0}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.425925981566816, "res": {"No": 0.5740738284158483, "Yes": 0.425925981566816}, "ground_truth": 0}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9803268867768881, "res": {"Yes": 0.9803268867768881, "No": 0.019673095250758776}, "ground_truth": 1}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9507554593873843, "res": {"Yes": 0.9507554593873843, "No": 0.04924442577515433}, "ground_truth": 0}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6534065012415733, "res": {"Yes": 0.6534065012415733, "No": 0.34659293002154684}, "ground_truth": 0}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.044763482263749876, "res": {"No": 0.9552362881104736, "Yes": 0.044763482263749876}, "ground_truth": 0}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8566293080431892, "res": {"Yes": 0.8566293080431892, "No": 0.14337025487251076}, "ground_truth": 0}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8614707982416212, "res": {"Yes": 0.8614707982416212, "No": 0.13852913700405786}, "ground_truth": 1}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9137606104518122, "res": {"Yes": 0.9137606104518122, "No": 0.08623926453301796}, "ground_truth": 0}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.5170362969869975, "res": {"Yes": 0.5170362969869975, "No": 0.48296325733179046}, "ground_truth": 0}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9504935718412341, "res": {"Yes": 0.9504935718412341, "No": 0.049506198253702616}, "ground_truth": 0}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9984965065803889, "res": {"Yes": 0.9984965065803889, "No": 0.0015035000834979126}, "ground_truth": 0}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.995380388382117, "res": {"Yes": 0.995380388382117, "No": 0.004619560538267298}, "ground_truth": 1}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9930352632844234, "res": {"Yes": 0.9930352632844234, "No": 0.0069645450036035315}, "ground_truth": 0}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9274967634949041, "res": {"Yes": 0.9274967634949041, "No": 0.07250293030052607}, "ground_truth": 0}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5143152061221039, "res": {"Yes": 0.5143152061221039, "No": 0.485684209970175}, "ground_truth": 0}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.63588593449827, "res": {"Yes": 0.63588593449827, "No": 0.364113917551842}, "ground_truth": 0}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8643053139259451, "res": {"Yes": 0.8643053139259451, "No": 0.135694438061563}, "ground_truth": 1}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6843916551371038, "res": {"Yes": 0.6843916551371038, "No": 0.3156082025538796}, "ground_truth": 0}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8868540266012819, "res": {"Yes": 0.8868540266012819, "No": 0.11314591613972817}, "ground_truth": 0}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6865864727115953, "res": {"Yes": 0.6865864727115953, "No": 0.3134130530872944}, "ground_truth": 0}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8085433933311316, "res": {"Yes": 0.8085433933311316, "No": 0.191456318156966}, "ground_truth": 0}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9930084567586159, "res": {"Yes": 0.9930084567586159, "No": 0.00699150526798096}, "ground_truth": 1}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8117247127572119, "res": {"Yes": 0.8117247127572119, "No": 0.1882749282662449}, "ground_truth": 0}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7512419414841771, "res": {"Yes": 0.7512419414841771, "No": 0.24875793598376936}, "ground_truth": 0}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.4469518224192221, "res": {"No": 0.5530477964507038, "Yes": 0.4469518224192221}, "ground_truth": 0}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9541621942954612, "res": {"Yes": 0.9541621942954612, "No": 0.04583727748627843}, "ground_truth": 0}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.993830339302709, "res": {"Yes": 0.993830339302709, "No": 0.006169672635054962}, "ground_truth": 1}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9878869380050885, "res": {"Yes": 0.9878869380050885, "No": 0.01211293123405792}, "ground_truth": 0}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9874787425205779, "res": {"Yes": 0.9874787425205779, "No": 0.012521183362564392}, "ground_truth": 0}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8931910582339692, "res": {"Yes": 0.8931910582339692, "No": 0.10680879560447022}, "ground_truth": 0}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.27725811044402376, "res": {"No": 0.7227417737682417, "Yes": 0.27725811044402376}, "ground_truth": 0}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6976166633428401, "res": {"Yes": 0.6976166633428401, "No": 0.3023829253683188}, "ground_truth": 1}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.29666539246354123, "res": {"No": 0.7033345790644645, "Yes": 0.29666539246354123}, "ground_truth": 0}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8670117585085185, "res": {"Yes": 0.8670117585085185, "No": 0.1329879441975639}, "ground_truth": 0}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9784218015477131, "res": {"Yes": 0.9784218015477131, "No": 0.02157817266467461}, "ground_truth": 0}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9949644498060253, "res": {"Yes": 0.9949644498060253, "No": 0.0050355022804579935}, "ground_truth": 0}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9967994939735602, "res": {"Yes": 0.9967994939735602, "No": 0.0032004911390510495}, "ground_truth": 1}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9832280744284556, "res": {"Yes": 0.9832280744284556, "No": 0.016771890138285783}, "ground_truth": 0}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.997536065482047, "res": {"Yes": 0.997536065482047, "No": 0.0024639345405474927}, "ground_truth": 0}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9490991855260944, "res": {"Yes": 0.9490991855260944, "No": 0.05090071542619389}, "ground_truth": 0}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9849591812736915, "res": {"Yes": 0.9849591812736915, "No": 0.015040831140289451}, "ground_truth": 0}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9862853496304936, "res": {"Yes": 0.9862853496304936, "No": 0.013714528960826164}, "ground_truth": 1}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9489195627263523, "res": {"Yes": 0.9489195627263523, "No": 0.051080319139530095}, "ground_truth": 0}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9693806583081506, "res": {"Yes": 0.9693806583081506, "No": 0.03061924947480279}, "ground_truth": 0}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7697190814396214, "res": {"Yes": 0.7697190814396214, "No": 0.2302808103969363}, "ground_truth": 0}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.765583018294839, "res": {"Yes": 0.765583018294839, "No": 0.2344169058374911}, "ground_truth": 0}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7077014464697899, "res": {"Yes": 0.7077014464697899, "No": 0.2922984820505625}, "ground_truth": 1}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6594419512707885, "res": {"Yes": 0.6594419512707885, "No": 0.3405579762153538}, "ground_truth": 0}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6291481232132177, "res": {"Yes": 0.6291481232132177, "No": 0.3708517099189626}, "ground_truth": 0}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8724152014011672, "res": {"Yes": 0.8724152014011672, "No": 0.12758465167307514}, "ground_truth": 0}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7517839250730372, "res": {"Yes": 0.7517839250730372, "No": 0.24821578756381485}, "ground_truth": 0}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8805649225792694, "res": {"Yes": 0.8805649225792694, "No": 0.11943456867921712}, "ground_truth": 1}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9017429476198149, "res": {"Yes": 0.9017429476198149, "No": 0.09825695912031748}, "ground_truth": 0}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8812826199452567, "res": {"Yes": 0.8812826199452567, "No": 0.11871712695889183}, "ground_truth": 0}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9982361752703934, "res": {"Yes": 0.9982361752703934, "No": 0.0017636576785649643}, "ground_truth": 0}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993478228100534, "res": {"Yes": 0.9993478228100534, "No": 0.0006520906317151074}, "ground_truth": 0}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999862172649827, "res": {"Yes": 0.9999862172649827, "No": 1.3763863324558295e-05}, "ground_truth": 1}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987915697406046, "res": {"Yes": 0.9987915697406046, "No": 0.0012083376583812671}, "ground_truth": 0}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995628701191182, "res": {"Yes": 0.9995628701191182, "No": 0.00043704254964495906}, "ground_truth": 0}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9057508778596207, "res": {"Yes": 0.9057508778596207, "No": 0.09424891166892475}, "ground_truth": 0}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9968817039161288, "res": {"Yes": 0.9968817039161288, "No": 0.003118283776937388}, "ground_truth": 0}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9572855860709711, "res": {"Yes": 0.9572855860709711, "No": 0.04271407020616972}, "ground_truth": 1}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9934289945990761, "res": {"Yes": 0.9934289945990761, "No": 0.006570942511008841}, "ground_truth": 0}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9599400695701772, "res": {"Yes": 0.9599400695701772, "No": 0.040059609366607844}, "ground_truth": 0}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9660919866768576, "res": {"Yes": 0.9660919866768576, "No": 0.033907823822491225}, "ground_truth": 0}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9955113810967925, "res": {"Yes": 0.9955113810967925, "No": 0.004488553901676708}, "ground_truth": 0}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.982081260550345, "res": {"Yes": 0.982081260550345, "No": 0.017918546235093254}, "ground_truth": 1}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9951293499729444, "res": {"Yes": 0.9951293499729444, "No": 0.004870690331916117}, "ground_truth": 0}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9918525310140134, "res": {"Yes": 0.9918525310140134, "No": 0.00814736676707958}, "ground_truth": 0}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9706757893587514, "res": {"Yes": 0.9706757893587514, "No": 0.029323946795192516}, "ground_truth": 0}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9683220003790195, "res": {"Yes": 0.9683220003790195, "No": 0.031677678322374246}, "ground_truth": 0}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9896051082332429, "res": {"Yes": 0.9896051082332429, "No": 0.010394685635755688}, "ground_truth": 1}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9935042946020609, "res": {"Yes": 0.9935042946020609, "No": 0.006495501620812485}, "ground_truth": 0}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9711884638411237, "res": {"Yes": 0.9711884638411237, "No": 0.02881139241765444}, "ground_truth": 0}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.3456321568291984, "res": {"No": 0.654367623506033, "Yes": 0.3456321568291984}, "ground_truth": 0}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9718251544067901, "res": {"Yes": 0.9718251544067901, "No": 0.02817456382778736}, "ground_truth": 0}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9327240073068037, "res": {"Yes": 0.9327240073068037, "No": 0.06727567829480516}, "ground_truth": 1}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9477698144237157, "res": {"Yes": 0.9477698144237157, "No": 0.05222989222527834}, "ground_truth": 0}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8294501977772775, "res": {"Yes": 0.8294501977772775, "No": 0.17054950030926863}, "ground_truth": 0}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8782174683221688, "res": {"Yes": 0.8782174683221688, "No": 0.12178239188032125}, "ground_truth": 0}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9102942773905425, "res": {"Yes": 0.9102942773905425, "No": 0.08970565216765902}, "ground_truth": 0}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993235361049371, "res": {"Yes": 0.9993235361049371, "No": 0.0006764279635897306}, "ground_truth": 1}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8454617002615643, "res": {"Yes": 0.8454617002615643, "No": 0.154537854455203}, "ground_truth": 0}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9515191113034321, "res": {"Yes": 0.9515191113034321, "No": 0.04848068052925423}, "ground_truth": 0}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9955701090408678, "res": {"Yes": 0.9955701090408678, "No": 0.0044299441598700945}, "ground_truth": 0}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9625860675373258, "res": {"Yes": 0.9625860675373258, "No": 0.03741389198801155}, "ground_truth": 0}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.997551130684728, "res": {"Yes": 0.997551130684728, "No": 0.002448876331101251}, "ground_truth": 1}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9779112602546641, "res": {"Yes": 0.9779112602546641, "No": 0.02208872459446228}, "ground_truth": 0}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9836336698338686, "res": {"Yes": 0.9836336698338686, "No": 0.0163663267811705}, "ground_truth": 0}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.955511342140696, "res": {"Yes": 0.955511342140696, "No": 0.0444886401467473}, "ground_truth": 0}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994410478220037, "res": {"Yes": 0.9994410478220037, "No": 0.0005589262642718668}, "ground_truth": 0}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9649434523125143, "res": {"Yes": 0.9649434523125143, "No": 0.03505633990914704}, "ground_truth": 1}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9980605142379134, "res": {"Yes": 0.9980605142379134, "No": 0.001939460184576592}, "ground_truth": 0}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.881593538410082, "res": {"Yes": 0.881593538410082, "No": 0.11840615181322828}, "ground_truth": 0}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8564631980288584, "res": {"Yes": 0.8564631980288584, "No": 0.143536158487545}, "ground_truth": 0}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9487447375566869, "res": {"Yes": 0.9487447375566869, "No": 0.051254969490304884}, "ground_truth": 0}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.986631835315869, "res": {"Yes": 0.986631835315869, "No": 0.013367842093959765}, "ground_truth": 1}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9620560509432473, "res": {"Yes": 0.9620560509432473, "No": 0.0379436277866234}, "ground_truth": 0}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9888501079963717, "res": {"Yes": 0.9888501079963717, "No": 0.011149615120640357}, "ground_truth": 0}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9308435229487689, "res": {"Yes": 0.9308435229487689, "No": 0.069156169898493}, "ground_truth": 0}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9880458761125149, "res": {"Yes": 0.9880458761125149, "No": 0.01195401946806207}, "ground_truth": 0}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9263992608844744, "res": {"Yes": 0.9263992608844744, "No": 0.07360048232058215}, "ground_truth": 1}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9809895243657878, "res": {"Yes": 0.9809895243657878, "No": 0.019010523209578984}, "ground_truth": 0}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9846148849586156, "res": {"Yes": 0.9846148849586156, "No": 0.015384911847404653}, "ground_truth": 0}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8903144268964454, "res": {"Yes": 0.8903144268964454, "No": 0.10968523665710338}, "ground_truth": 0}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9972770579501495, "res": {"Yes": 0.9972770579501495, "No": 0.002722908958808048}, "ground_truth": 0}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.997543062922828, "res": {"Yes": 0.997543062922828, "No": 0.002456950333335682}, "ground_truth": 1}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9684355931485072, "res": {"Yes": 0.9684355931485072, "No": 0.03156426350757809}, "ground_truth": 0}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9698209055356187, "res": {"Yes": 0.9698209055356187, "No": 0.030179059172553587}, "ground_truth": 0}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.212486568430759, "res": {"No": 0.7875132051330631, "Yes": 0.212486568430759}, "ground_truth": 0}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8961870917928517, "res": {"Yes": 0.8961870917928517, "No": 0.10381284171068261}, "ground_truth": 0}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.755732501055124, "res": {"Yes": 0.755732501055124, "No": 0.2442671196084036}, "ground_truth": 1}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2801229402095235, "res": {"No": 0.7198767659673382, "Yes": 0.2801229402095235}, "ground_truth": 0}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.863619448785902, "res": {"Yes": 0.863619448785902, "No": 0.13638041894495123}, "ground_truth": 0}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9010467913827529, "res": {"Yes": 0.9010467913827529, "No": 0.09895303849772721}, "ground_truth": 0}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9976660884167877, "res": {"Yes": 0.9976660884167877, "No": 0.0023339387219093104}, "ground_truth": 0}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9931702365741305, "res": {"Yes": 0.9931702365741305, "No": 0.006829669472876092}, "ground_truth": 1}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997985063825662, "res": {"Yes": 0.9997985063825662, "No": 0.00020143737881294345}, "ground_truth": 0}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.998874704320568, "res": {"Yes": 0.998874704320568, "No": 0.0011253063154357476}, "ground_truth": 0}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.09438055615314467, "res": {"No": 0.9056191640872527, "Yes": 0.09438055615314467}, "ground_truth": 0}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9497376950374157, "res": {"Yes": 0.9497376950374157, "No": 0.050262108293737735}, "ground_truth": 0}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.98330852642402, "res": {"Yes": 0.98330852642402, "No": 0.01669143942950534}, "ground_truth": 1}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9859502293363764, "res": {"Yes": 0.9859502293363764, "No": 0.014049562815795388}, "ground_truth": 0}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9677959561751092, "res": {"Yes": 0.9677959561751092, "No": 0.03220373626785186}, "ground_truth": 0}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.89402343187598, "res": {"Yes": 0.89402343187598, "No": 0.10597645841079578}, "ground_truth": 0}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9884648914476836, "res": {"Yes": 0.9884648914476836, "No": 0.011535043687239035}, "ground_truth": 0}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8226797822373553, "res": {"Yes": 0.8226797822373553, "No": 0.17732004771775542}, "ground_truth": 1}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9208435425072465, "res": {"Yes": 0.9208435425072465, "No": 0.07915635541879573}, "ground_truth": 0}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9767429850525559, "res": {"Yes": 0.9767429850525559, "No": 0.023256909693105208}, "ground_truth": 0}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9988041754686748, "res": {"Yes": 0.9988041754686748, "No": 0.0011957753207572622}, "ground_truth": 0}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9814501069763674, "res": {"Yes": 0.9814501069763674, "No": 0.018549684465860885}, "ground_truth": 0}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.856964812631789, "res": {"Yes": 0.856964812631789, "No": 0.1430346600670207}, "ground_truth": 1}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9789128889819237, "res": {"Yes": 0.9789128889819237, "No": 0.021087043062233867}, "ground_truth": 0}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9504198030959299, "res": {"Yes": 0.9504198030959299, "No": 0.04958002242876215}, "ground_truth": 0}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9957833063237324, "res": {"Yes": 0.9957833063237324, "No": 0.00421666393627771}, "ground_truth": 0}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9837733567162225, "res": {"Yes": 0.9837733567162225, "No": 0.016226470317576357}, "ground_truth": 0}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996924714366056, "res": {"Yes": 0.9996924714366056, "No": 0.00030748354018623797}, "ground_truth": 1}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989770030185493, "res": {"Yes": 0.9989770030185493, "No": 0.001022911506156706}, "ground_truth": 0}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9972957936710447, "res": {"Yes": 0.9972957936710447, "No": 0.0027041517549053016}, "ground_truth": 0}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.916413332253304, "res": {"Yes": 0.916413332253304, "No": 0.08358655476463009}, "ground_truth": 0}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9175265281100078, "res": {"Yes": 0.9175265281100078, "No": 0.08247333087691142}, "ground_truth": 0}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6859458981752883, "res": {"Yes": 0.6859458981752883, "No": 0.3140537278212974}, "ground_truth": 1}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5680329791856207, "res": {"Yes": 0.5680329791856207, "No": 0.4319669492631874}, "ground_truth": 0}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8924653407285411, "res": {"Yes": 0.8924653407285411, "No": 0.10753443038960207}, "ground_truth": 0}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9578972935467369, "res": {"Yes": 0.9578972935467369, "No": 0.04210248137529232}, "ground_truth": 0}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993747310221709, "res": {"Yes": 0.9993747310221709, "No": 0.0006252081022799791}, "ground_truth": 0}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9927336968665311, "res": {"Yes": 0.9927336968665311, "No": 0.007266147829604407}, "ground_truth": 1}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9926107040861789, "res": {"Yes": 0.9926107040861789, "No": 0.007389263579017763}, "ground_truth": 0}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9940301969118792, "res": {"Yes": 0.9940301969118792, "No": 0.0059697722582624495}, "ground_truth": 0}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9062033101272793, "res": {"Yes": 0.9062033101272793, "No": 0.09379659727630281}, "ground_truth": 0}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9955429308456534, "res": {"Yes": 0.9955429308456534, "No": 0.004457064114485113}, "ground_truth": 0}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8568083965514212, "res": {"Yes": 0.8568083965514212, "No": 0.14319141175958552}, "ground_truth": 1}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8093858490114815, "res": {"Yes": 0.8093858490114815, "No": 0.19061389291374947}, "ground_truth": 0}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9920278891797183, "res": {"Yes": 0.9920278891797183, "No": 0.007971979883732406}, "ground_truth": 0}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9375594123239663, "res": {"Yes": 0.9375594123239663, "No": 0.06244036111247574}, "ground_truth": 0}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989570158864183, "res": {"Yes": 0.9989570158864183, "No": 0.0010429901989369827}, "ground_truth": 0}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998579583547737, "res": {"Yes": 0.998579583547737, "No": 0.0014203975240963}, "ground_truth": 1}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9809938750639761, "res": {"Yes": 0.9809938750639761, "No": 0.019006109478543722}, "ground_truth": 0}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.999553222594762, "res": {"Yes": 0.999553222594762, "No": 0.000446715335298316}, "ground_truth": 0}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8059591017569293, "res": {"Yes": 0.8059591017569293, "No": 0.19404061231458386}, "ground_truth": 0}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9153230187951554, "res": {"Yes": 0.9153230187951554, "No": 0.0846767426529973}, "ground_truth": 0}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8630386528221689, "res": {"Yes": 0.8630386528221689, "No": 0.13696102262780682}, "ground_truth": 1}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9313872477625966, "res": {"Yes": 0.9313872477625966, "No": 0.06861251681734638}, "ground_truth": 0}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7763515756465909, "res": {"Yes": 0.7763515756465909, "No": 0.22364768503392957}, "ground_truth": 0}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7228082690701653, "res": {"Yes": 0.7228082690701653, "No": 0.277191576481912}, "ground_truth": 0}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.5488007971712483, "res": {"Yes": 0.5488007971712483, "No": 0.4511987992673852}, "ground_truth": 0}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.878237061572449, "res": {"Yes": 0.878237061572449, "No": 0.12176282302234594}, "ground_truth": 1}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6673631422943156, "res": {"Yes": 0.6673631422943156, "No": 0.33263609312370995}, "ground_truth": 0}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.690545067735374, "res": {"Yes": 0.690545067735374, "No": 0.3094546884495762}, "ground_truth": 0}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.939241316230622, "res": {"Yes": 0.939241316230622, "No": 0.06075841426092035}, "ground_truth": 0}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9337150714329576, "res": {"Yes": 0.9337150714329576, "No": 0.06628457920634646}, "ground_truth": 0}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9968013912835226, "res": {"Yes": 0.9968013912835226, "No": 0.003198556387090475}, "ground_truth": 1}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9684344746060431, "res": {"Yes": 0.9684344746060431, "No": 0.031565308301991446}, "ground_truth": 0}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9906268652659056, "res": {"Yes": 0.9906268652659056, "No": 0.009373082138000224}, "ground_truth": 0}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9698562289959117, "res": {"Yes": 0.9698562289959117, "No": 0.030143685918886505}, "ground_truth": 0}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9103882246075622, "res": {"Yes": 0.9103882246075622, "No": 0.08961161819969803}, "ground_truth": 0}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9531105089752847, "res": {"Yes": 0.9531105089752847, "No": 0.04688936445165046}, "ground_truth": 1}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.938104001802638, "res": {"Yes": 0.938104001802638, "No": 0.061895908501376176}, "ground_truth": 0}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9641034855284093, "res": {"Yes": 0.9641034855284093, "No": 0.03589625882216539}, "ground_truth": 0}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.830317924472534, "res": {"Yes": 0.830317924472534, "No": 0.1696819290563439}, "ground_truth": 0}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9215830657929174, "res": {"Yes": 0.9215830657929174, "No": 0.0784168461771102}, "ground_truth": 0}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9598151837318603, "res": {"Yes": 0.9598151837318603, "No": 0.040184641845447996}, "ground_truth": 1}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.989533899801334, "res": {"Yes": 0.989533899801334, "No": 0.01046603109486837}, "ground_truth": 0}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8700860884391922, "res": {"Yes": 0.8700860884391922, "No": 0.12991368637514875}, "ground_truth": 0}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8103329227651886, "res": {"Yes": 0.8103329227651886, "No": 0.1896670333920572}, "ground_truth": 0}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9133925816065385, "res": {"Yes": 0.9133925816065385, "No": 0.08660731484054439}, "ground_truth": 0}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9856908402387596, "res": {"Yes": 0.9856908402387596, "No": 0.014309079687866221}, "ground_truth": 1}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.987216580983622, "res": {"Yes": 0.987216580983622, "No": 0.012783315945927907}, "ground_truth": 0}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.740781143657633, "res": {"Yes": 0.740781143657633, "No": 0.25921858906598616}, "ground_truth": 0}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.689155116848646, "res": {"Yes": 0.689155116848646, "No": 0.3108446528872218}, "ground_truth": 0}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9949629185569153, "res": {"Yes": 0.9949629185569153, "No": 0.005037050431797674}, "ground_truth": 0}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9652366929986546, "res": {"Yes": 0.9652366929986546, "No": 0.03476305131753092}, "ground_truth": 1}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9791531848073589, "res": {"Yes": 0.9791531848073589, "No": 0.020846566928541685}, "ground_truth": 0}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.979897223300857, "res": {"Yes": 0.979897223300857, "No": 0.020102638577905098}, "ground_truth": 0}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9917605945129373, "res": {"Yes": 0.9917605945129373, "No": 0.008239337819509036}, "ground_truth": 0}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9957810584450338, "res": {"Yes": 0.9957810584450338, "No": 0.004218876578503995}, "ground_truth": 0}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998280596834308, "res": {"Yes": 0.9998280596834308, "No": 0.00017181607236930625}, "ground_truth": 1}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998741809658787, "res": {"Yes": 0.9998741809658787, "No": 0.0001257166491081661}, "ground_truth": 0}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999313858390593, "res": {"Yes": 0.9999313858390593, "No": 6.858189358368681e-05}, "ground_truth": 0}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9999391335724361, "res": {"Yes": 0.9999391335724361, "No": 6.076017500343399e-05}, "ground_truth": 0}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994167551134628, "res": {"Yes": 0.9994167551134628, "No": 0.0005832227319867748}, "ground_truth": 0}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9894045732038995, "res": {"Yes": 0.9894045732038995, "No": 0.010595170445910563}, "ground_truth": 1}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9935217131700985, "res": {"Yes": 0.9935217131700985, "No": 0.0064782685720005916}, "ground_truth": 0}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9981017253067672, "res": {"Yes": 0.9981017253067672, "No": 0.0018982129272945136}, "ground_truth": 0}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.1402253510660298, "res": {"No": 0.8597744599515146, "Yes": 0.1402253510660298}, "ground_truth": 0}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.38416989090629955, "res": {"No": 0.6158297398503265, "Yes": 0.38416989090629955}, "ground_truth": 0}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8758503990237584, "res": {"Yes": 0.8758503990237584, "No": 0.12414951172248158}, "ground_truth": 1}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9715482751943412, "res": {"Yes": 0.9715482751943412, "No": 0.028451630755494436}, "ground_truth": 0}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.967363934639402, "res": {"Yes": 0.967363934639402, "No": 0.03263595774997032}, "ground_truth": 0}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8300972968013057, "res": {"Yes": 0.8300972968013057, "No": 0.16990237017322446}, "ground_truth": 0}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9123488236162969, "res": {"Yes": 0.9123488236162969, "No": 0.0876510226724115}, "ground_truth": 0}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9101905825730311, "res": {"Yes": 0.9101905825730311, "No": 0.08980911549254386}, "ground_truth": 1}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9042620384959589, "res": {"Yes": 0.9042620384959589, "No": 0.09573777838077811}, "ground_truth": 0}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9645301667875181, "res": {"Yes": 0.9645301667875181, "No": 0.03546979818324778}, "ground_truth": 0}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.10794699061414956, "res": {"No": 0.8920528671973067, "Yes": 0.10794699061414956}, "ground_truth": 0}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9704890454198922, "res": {"Yes": 0.9704890454198922, "No": 0.029510740415993792}, "ground_truth": 0}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.864404515939423, "res": {"Yes": 0.864404515939423, "No": 0.13559513251868752}, "ground_truth": 1}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9418189267656099, "res": {"Yes": 0.9418189267656099, "No": 0.05818089748666169}, "ground_truth": 0}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9129329365756411, "res": {"Yes": 0.9129329365756411, "No": 0.08706677726299356}, "ground_truth": 0}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9660918707458261, "res": {"Yes": 0.9660918707458261, "No": 0.03390795267246657}, "ground_truth": 0}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.989405510170474, "res": {"Yes": 0.989405510170474, "No": 0.010594371600175805}, "ground_truth": 0}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9980626519860183, "res": {"Yes": 0.9980626519860183, "No": 0.0019373343445096393}, "ground_truth": 1}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9874563289824524, "res": {"Yes": 0.9874563289824524, "No": 0.012543514749438446}, "ground_truth": 0}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9976694107501607, "res": {"Yes": 0.9976694107501607, "No": 0.0023305483400853583}, "ground_truth": 0}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5465433508455712, "res": {"Yes": 0.5465433508455712, "No": 0.4534565610556738}, "ground_truth": 0}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9169443367869281, "res": {"Yes": 0.9169443367869281, "No": 0.08305540199042001}, "ground_truth": 0}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3401682945042464, "res": {"No": 0.6598316646856064, "Yes": 0.3401682945042464}, "ground_truth": 1}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6740320144593719, "res": {"Yes": 0.6740320144593719, "No": 0.32596795499770587}, "ground_truth": 0}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6079103612741728, "res": {"Yes": 0.6079103612741728, "No": 0.39208937411002115}, "ground_truth": 0}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8807770145676059, "res": {"Yes": 0.8807770145676059, "No": 0.11922295111187887}, "ground_truth": 0}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9581403927429182, "res": {"Yes": 0.9581403927429182, "No": 0.04185947934706286}, "ground_truth": 0}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7509167917206593, "res": {"Yes": 0.7509167917206593, "No": 0.24908317553565415}, "ground_truth": 1}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6935661763912983, "res": {"Yes": 0.6935661763912983, "No": 0.3064337797397746}, "ground_truth": 0}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7132771246204986, "res": {"Yes": 0.7132771246204986, "No": 0.28672282397526117}, "ground_truth": 0}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9265933803928644, "res": {"Yes": 0.9265933803928644, "No": 0.0734064112827338}, "ground_truth": 0}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9934155888619606, "res": {"Yes": 0.9934155888619606, "No": 0.006584279928709543}, "ground_truth": 0}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9889291459886567, "res": {"Yes": 0.9889291459886567, "No": 0.011070570476834669}, "ground_truth": 1}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9971033982695533, "res": {"Yes": 0.9971033982695533, "No": 0.0028965801398126555}, "ground_truth": 0}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8551742069337879, "res": {"Yes": 0.8551742069337879, "No": 0.14482545793729423}, "ground_truth": 0}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7234451122429315, "res": {"Yes": 0.7234451122429315, "No": 0.27655457487767793}, "ground_truth": 0}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.562040144581435, "res": {"Yes": 0.562040144581435, "No": 0.4379594731243257}, "ground_truth": 0}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9939233718414208, "res": {"Yes": 0.9939233718414208, "No": 0.006076630078921775}, "ground_truth": 1}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9757542961600263, "res": {"Yes": 0.9757542961600263, "No": 0.024245571423193547}, "ground_truth": 0}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8804023676891665, "res": {"Yes": 0.8804023676891665, "No": 0.11959712215551072}, "ground_truth": 0}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9882118580467012, "res": {"Yes": 0.9882118580467012, "No": 0.01178787992930116}, "ground_truth": 0}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9905097721662232, "res": {"Yes": 0.9905097721662232, "No": 0.009490196125785848}, "ground_truth": 0}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9545522944941767, "res": {"Yes": 0.9545522944941767, "No": 0.0454476393137101}, "ground_truth": 1}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9978065950359737, "res": {"Yes": 0.9978065950359737, "No": 0.002193351587015848}, "ground_truth": 0}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9714631178625666, "res": {"Yes": 0.9714631178625666, "No": 0.02853674568183403}, "ground_truth": 0}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9985466556293027, "res": {"Yes": 0.9985466556293027, "No": 0.0014532792852921428}, "ground_truth": 0}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997939775555391, "res": {"Yes": 0.9997939775555391, "No": 0.00020596176771825936}, "ground_truth": 0}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9982247739810276, "res": {"Yes": 0.9982247739810276, "No": 0.0017751728154093726}, "ground_truth": 1}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9963485336763149, "res": {"Yes": 0.9963485336763149, "No": 0.003651485163635498}, "ground_truth": 0}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9927655406011239, "res": {"Yes": 0.9927655406011239, "No": 0.007234435105286109}, "ground_truth": 0}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7837774450817063, "res": {"Yes": 0.7837774450817063, "No": 0.21622211272135022}, "ground_truth": 0}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6354802669844682, "res": {"Yes": 0.6354802669844682, "No": 0.3645193290484145}, "ground_truth": 0}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8775705219296185, "res": {"Yes": 0.8775705219296185, "No": 0.12242929491496596}, "ground_truth": 1}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9224625224160319, "res": {"Yes": 0.9224625224160319, "No": 0.07753728977153543}, "ground_truth": 0}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9322707054308418, "res": {"Yes": 0.9322707054308418, "No": 0.06772902237566379}, "ground_truth": 0}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7338430071215772, "res": {"Yes": 0.7338430071215772, "No": 0.2661567388338722}, "ground_truth": 0}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7512355709795243, "res": {"Yes": 0.7512355709795243, "No": 0.24876435402131006}, "ground_truth": 0}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4781680209460021, "res": {"No": 0.5218318862308623, "Yes": 0.4781680209460021}, "ground_truth": 1}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3110546993854648, "res": {"No": 0.6889450737097285, "Yes": 0.3110546993854648}, "ground_truth": 0}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.3573606796773079, "res": {"No": 0.6426391177308046, "Yes": 0.3573606796773079}, "ground_truth": 0}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9348570958851229, "res": {"Yes": 0.9348570958851229, "No": 0.06514263034989727}, "ground_truth": 0}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.29586364191642456, "res": {"No": 0.7041360772177532, "Yes": 0.29586364191642456}, "ground_truth": 0}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9399581311874754, "res": {"Yes": 0.9399581311874754, "No": 0.06004162543790324}, "ground_truth": 1}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9701902623173922, "res": {"Yes": 0.9701902623173922, "No": 0.02980946853095692}, "ground_truth": 0}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.791110353157677, "res": {"Yes": 0.791110353157677, "No": 0.20888899343662615}, "ground_truth": 0}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9913723972263292, "res": {"Yes": 0.9913723972263292, "No": 0.008627487084574144}, "ground_truth": 0}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9915610624383129, "res": {"Yes": 0.9915610624383129, "No": 0.008438919499128388}, "ground_truth": 0}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9938631955786836, "res": {"Yes": 0.9938631955786836, "No": 0.006136734614703119}, "ground_truth": 1}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8822014656100442, "res": {"Yes": 0.8822014656100442, "No": 0.11779829282704628}, "ground_truth": 0}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9875393153368843, "res": {"Yes": 0.9875393153368843, "No": 0.012460580221169542}, "ground_truth": 0}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.969027255534846, "res": {"Yes": 0.969027255534846, "No": 0.030972669264381508}, "ground_truth": 0}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9904304810707492, "res": {"Yes": 0.9904304810707492, "No": 0.009569409295791545}, "ground_truth": 0}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9924506512692763, "res": {"Yes": 0.9924506512692763, "No": 0.007549208920142179}, "ground_truth": 1}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9961857174265368, "res": {"Yes": 0.9961857174265368, "No": 0.0038142089053811203}, "ground_truth": 0}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9940274900719355, "res": {"Yes": 0.9940274900719355, "No": 0.005972510026428257}, "ground_truth": 0}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8285445928227371, "res": {"Yes": 0.8285445928227371, "No": 0.1714553321904668}, "ground_truth": 0}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9000265914848732, "res": {"Yes": 0.9000265914848732, "No": 0.09997329286639463}, "ground_truth": 0}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8687144967400754, "res": {"Yes": 0.8687144967400754, "No": 0.13128539994631067}, "ground_truth": 1}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6592028348211235, "res": {"Yes": 0.6592028348211235, "No": 0.34079702960933844}, "ground_truth": 0}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9723702737392351, "res": {"Yes": 0.9723702737392351, "No": 0.027629617052147237}, "ground_truth": 0}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8558463813373111, "res": {"Yes": 0.8558463813373111, "No": 0.14415299184955485}, "ground_truth": 0}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9970075277974568, "res": {"Yes": 0.9970075277974568, "No": 0.0029923118391082383}, "ground_truth": 0}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9950823653533095, "res": {"Yes": 0.9950823653533095, "No": 0.004917544295569463}, "ground_truth": 1}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9957851983138117, "res": {"Yes": 0.9957851983138117, "No": 0.004214695213951909}, "ground_truth": 0}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9958763377414688, "res": {"Yes": 0.9958763377414688, "No": 0.004123426856730999}, "ground_truth": 0}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7613060528691105, "res": {"Yes": 0.7613060528691105, "No": 0.23869356608034759}, "ground_truth": 0}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9690502081862478, "res": {"Yes": 0.9690502081862478, "No": 0.03094954441475806}, "ground_truth": 0}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9897685738187166, "res": {"Yes": 0.9897685738187166, "No": 0.010231243010471043}, "ground_truth": 1}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9302281131079467, "res": {"Yes": 0.9302281131079467, "No": 0.06977162991817051}, "ground_truth": 0}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8782447813101484, "res": {"Yes": 0.8782447813101484, "No": 0.12175460430917666}, "ground_truth": 0}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9979187637817852, "res": {"Yes": 0.9979187637817852, "No": 0.002081163846848377}, "ground_truth": 0}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.912851569576286, "res": {"Yes": 0.912851569576286, "No": 0.08714839706619514}, "ground_truth": 0}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994222316724375, "res": {"Yes": 0.9994222316724375, "No": 0.0005777036713974506}, "ground_truth": 1}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9983504631838217, "res": {"Yes": 0.9983504631838217, "No": 0.0016494814717582158}, "ground_truth": 0}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8788716589303542, "res": {"Yes": 0.8788716589303542, "No": 0.12112829570724831}, "ground_truth": 0}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6812908987120645, "res": {"Yes": 0.6812908987120645, "No": 0.31870855961894906}, "ground_truth": 0}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8861788202651786, "res": {"Yes": 0.8861788202651786, "No": 0.11382098179360524}, "ground_truth": 0}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9924648568133385, "res": {"Yes": 0.9924648568133385, "No": 0.0075349604180231814}, "ground_truth": 1}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7533570903638077, "res": {"Yes": 0.7533570903638077, "No": 0.2466423419576212}, "ground_truth": 0}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9563272048523108, "res": {"Yes": 0.9563272048523108, "No": 0.04367273432913474}, "ground_truth": 0}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6878281047589656, "res": {"Yes": 0.6878281047589656, "No": 0.3121716756601718}, "ground_truth": 0}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9161426051015362, "res": {"Yes": 0.9161426051015362, "No": 0.08385724417418845}, "ground_truth": 0}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8563608225372711, "res": {"Yes": 0.8563608225372711, "No": 0.1436389384503506}, "ground_truth": 1}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9485175233266719, "res": {"Yes": 0.9485175233266719, "No": 0.051482311311051}, "ground_truth": 0}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9729419474885111, "res": {"Yes": 0.9729419474885111, "No": 0.027057939628283963}, "ground_truth": 0}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8552503293777262, "res": {"Yes": 0.8552503293777262, "No": 0.14474941557725723}, "ground_truth": 0}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9964511423337021, "res": {"Yes": 0.9964511423337021, "No": 0.0035489020678240012}, "ground_truth": 0}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8905380396352301, "res": {"Yes": 0.8905380396352301, "No": 0.10946156455211754}, "ground_truth": 1}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9797507288386438, "res": {"Yes": 0.9797507288386438, "No": 0.02024931931638968}, "ground_truth": 0}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.972328119484521, "res": {"Yes": 0.972328119484521, "No": 0.027671673675637755}, "ground_truth": 0}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.18700734216198545, "res": {"No": 0.8129924630650432, "Yes": 0.18700734216198545}, "ground_truth": 0}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.500930655326062, "res": {"Yes": 0.500930655326062, "No": 0.49906910791711195}, "ground_truth": 0}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9780756775822911, "res": {"Yes": 0.9780756775822911, "No": 0.02192418968284576}, "ground_truth": 1}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7669842594921306, "res": {"Yes": 0.7669842594921306, "No": 0.23301568735010247}, "ground_truth": 0}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9905500124430925, "res": {"Yes": 0.9905500124430925, "No": 0.009449867110087174}, "ground_truth": 0}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8751496575935097, "res": {"Yes": 0.8751496575935097, "No": 0.12485012013776371}, "ground_truth": 0}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.411279708499188, "res": {"No": 0.5887198765829854, "Yes": 0.411279708499188}, "ground_truth": 0}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9666115157264734, "res": {"Yes": 0.9666115157264734, "No": 0.033388237890135355}, "ground_truth": 1}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9862670364541417, "res": {"Yes": 0.9862670364541417, "No": 0.013732753954003168}, "ground_truth": 0}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9732581388289527, "res": {"Yes": 0.9732581388289527, "No": 0.026741799857996126}, "ground_truth": 0}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9980862839940404, "res": {"Yes": 0.9980862839940404, "No": 0.0019135751122781976}, "ground_truth": 0}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.996119590920687, "res": {"Yes": 0.996119590920687, "No": 0.0038802410538338835}, "ground_truth": 0}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9636116434817784, "res": {"Yes": 0.9636116434817784, "No": 0.036387935557721296}, "ground_truth": 1}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9896073239615607, "res": {"Yes": 0.9896073239615607, "No": 0.01039248843146497}, "ground_truth": 0}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9961827668284301, "res": {"Yes": 0.9961827668284301, "No": 0.003817266837547652}, "ground_truth": 0}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8698123589182201, "res": {"Yes": 0.8698123589182201, "No": 0.1301872864875908}, "ground_truth": 0}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9246024847992788, "res": {"Yes": 0.9246024847992788, "No": 0.0753973613453758}, "ground_truth": 0}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9665547202690593, "res": {"Yes": 0.9665547202690593, "No": 0.03344516657028629}, "ground_truth": 1}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9648061679950076, "res": {"Yes": 0.9648061679950076, "No": 0.03519377002388904}, "ground_truth": 0}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9503215005572752, "res": {"Yes": 0.9503215005572752, "No": 0.049678452062727405}, "ground_truth": 0}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9846737302563536, "res": {"Yes": 0.9846737302563536, "No": 0.01532620761405254}, "ground_truth": 0}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8192460687590145, "res": {"Yes": 0.8192460687590145, "No": 0.18075294949230955}, "ground_truth": 0}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9860267164061326, "res": {"Yes": 0.9860267164061326, "No": 0.013973038885289814}, "ground_truth": 1}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9885042657538977, "res": {"Yes": 0.9885042657538977, "No": 0.011495610639435248}, "ground_truth": 0}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9871959098720497, "res": {"Yes": 0.9871959098720497, "No": 0.012803982802632903}, "ground_truth": 0}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6730027655897588, "res": {"Yes": 0.6730027655897588, "No": 0.32699715775610927}, "ground_truth": 0}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9501602446815787, "res": {"Yes": 0.9501602446815787, "No": 0.04983951677876903}, "ground_truth": 0}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8842852477002695, "res": {"Yes": 0.8842852477002695, "No": 0.1157145611575396}, "ground_truth": 1}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9902768504779467, "res": {"Yes": 0.9902768504779467, "No": 0.009723045369127195}, "ground_truth": 0}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9738925847682222, "res": {"Yes": 0.9738925847682222, "No": 0.026107271550207485}, "ground_truth": 0}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9700575445169923, "res": {"Yes": 0.9700575445169923, "No": 0.029942377330098944}, "ground_truth": 0}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.989110360121423, "res": {"Yes": 0.989110360121423, "No": 0.010889507151938578}, "ground_truth": 0}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9977924666935167, "res": {"Yes": 0.9977924666935167, "No": 0.002207547738776882}, "ground_truth": 1}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998937505347577, "res": {"Yes": 0.998937505347577, "No": 0.0010624496994292533}, "ground_truth": 0}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.999189987897066, "res": {"Yes": 0.999189987897066, "No": 0.0008099675279921106}, "ground_truth": 0}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9916154570038745, "res": {"Yes": 0.9916154570038745, "No": 0.008384414610817991}, "ground_truth": 0}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.945683310396476, "res": {"Yes": 0.945683310396476, "No": 0.0543165559893294}, "ground_truth": 0}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985706670707282, "res": {"Yes": 0.9985706670707282, "No": 0.0014292440655093326}, "ground_truth": 1}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990040093282954, "res": {"Yes": 0.9990040093282954, "No": 0.0009959923312076807}, "ground_truth": 0}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9887314610126998, "res": {"Yes": 0.9887314610126998, "No": 0.011268463450742108}, "ground_truth": 0}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9961803989048076, "res": {"Yes": 0.9961803989048076, "No": 0.003819632748963864}, "ground_truth": 0}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986492485870182, "res": {"Yes": 0.9986492485870182, "No": 0.0013506675868272995}, "ground_truth": 0}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9975399785244459, "res": {"Yes": 0.9975399785244459, "No": 0.0024599914614479683}, "ground_truth": 1}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999436630499856, "res": {"Yes": 0.9999436630499856, "No": 5.624614795638708e-05}, "ground_truth": 0}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9968962766387662, "res": {"Yes": 0.9968962766387662, "No": 0.0031036855082891986}, "ground_truth": 0}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.37514290335097217, "res": {"No": 0.6248569721466554, "Yes": 0.37514290335097217}, "ground_truth": 0}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.964731379119719, "res": {"Yes": 0.964731379119719, "No": 0.03526845290600649}, "ground_truth": 0}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9785087694991637, "res": {"Yes": 0.9785087694991637, "No": 0.021491123275718275}, "ground_truth": 1}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7355911333163884, "res": {"Yes": 0.7355911333163884, "No": 0.2644087705590811}, "ground_truth": 0}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9915039937921405, "res": {"Yes": 0.9915039937921405, "No": 0.008495893923705991}, "ground_truth": 0}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8223590161261277, "res": {"Yes": 0.8223590161261277, "No": 0.17764013297393472}, "ground_truth": 0}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996425527394996, "res": {"Yes": 0.9996425527394996, "No": 0.0003572701284443972}, "ground_truth": 0}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996685228307005, "res": {"Yes": 0.9996685228307005, "No": 0.000331397498924296}, "ground_truth": 1}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999242341303785, "res": {"Yes": 0.9999242341303785, "No": 7.574402458898326e-05}, "ground_truth": 0}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998746577259993, "res": {"Yes": 0.9998746577259993, "No": 0.0001253074801398653}, "ground_truth": 0}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.589023822781451, "res": {"Yes": 0.589023822781451, "No": 0.41097594260781867}, "ground_truth": 0}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8800101125868031, "res": {"Yes": 0.8800101125868031, "No": 0.1199897447822421}, "ground_truth": 0}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9091320098420941, "res": {"Yes": 0.9091320098420941, "No": 0.09086774329655091}, "ground_truth": 1}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9751963145458254, "res": {"Yes": 0.9751963145458254, "No": 0.024803601861653084}, "ground_truth": 0}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8580042895767248, "res": {"Yes": 0.8580042895767248, "No": 0.1419956295033437}, "ground_truth": 0}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8879865646527294, "res": {"Yes": 0.8879865646527294, "No": 0.11201326244612107}, "ground_truth": 0}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9176886289939828, "res": {"Yes": 0.9176886289939828, "No": 0.08231124881659468}, "ground_truth": 0}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9028072305891442, "res": {"Yes": 0.9028072305891442, "No": 0.09719271683540652}, "ground_truth": 1}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8383009450482508, "res": {"Yes": 0.8383009450482508, "No": 0.1616988701088782}, "ground_truth": 0}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9783987462880195, "res": {"Yes": 0.9783987462880195, "No": 0.02160121966314097}, "ground_truth": 0}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8135411416251017, "res": {"Yes": 0.8135411416251017, "No": 0.18645873955584014}, "ground_truth": 0}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9496012531752971, "res": {"Yes": 0.9496012531752971, "No": 0.05039867973409562}, "ground_truth": 0}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.990475736854196, "res": {"Yes": 0.990475736854196, "No": 0.009524210009920417}, "ground_truth": 1}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9391807559030374, "res": {"Yes": 0.9391807559030374, "No": 0.06081920306451733}, "ground_truth": 0}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8486406202412871, "res": {"Yes": 0.8486406202412871, "No": 0.15135920894605534}, "ground_truth": 0}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9539886832744157, "res": {"Yes": 0.9539886832744157, "No": 0.046011118738227996}, "ground_truth": 0}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8928186752099422, "res": {"Yes": 0.8928186752099422, "No": 0.10718089119634121}, "ground_truth": 0}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9272043967525796, "res": {"Yes": 0.9272043967525796, "No": 0.07279539363688552}, "ground_truth": 1}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.879888371609353, "res": {"Yes": 0.879888371609353, "No": 0.12011145206850161}, "ground_truth": 0}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9961460904449664, "res": {"Yes": 0.9961460904449664, "No": 0.0038537329616990987}, "ground_truth": 0}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9753175165493629, "res": {"Yes": 0.9753175165493629, "No": 0.024682272637671934}, "ground_truth": 0}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9970960434625923, "res": {"Yes": 0.9970960434625923, "No": 0.0029039714887248444}, "ground_truth": 0}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9201505178154172, "res": {"Yes": 0.9201505178154172, "No": 0.07984923378871725}, "ground_truth": 1}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984576411598366, "res": {"Yes": 0.9984576411598366, "No": 0.0015421563726510802}, "ground_truth": 0}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.958175961490765, "res": {"Yes": 0.958175961490765, "No": 0.041823830259327024}, "ground_truth": 0}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.31958560716686196, "res": {"No": 0.6804140017285434, "Yes": 0.31958560716686196}, "ground_truth": 0}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.40837978171574085, "res": {"No": 0.5916197290918357, "Yes": 0.40837978171574085}, "ground_truth": 0}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7021918511902914, "res": {"Yes": 0.7021918511902914, "No": 0.2978080284186751}, "ground_truth": 1}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7453561210373716, "res": {"Yes": 0.7453561210373716, "No": 0.2546435385041389}, "ground_truth": 0}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8915806190292773, "res": {"Yes": 0.8915806190292773, "No": 0.10841917656948394}, "ground_truth": 0}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9779870757638761, "res": {"Yes": 0.9779870757638761, "No": 0.02201270703205396}, "ground_truth": 0}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9732013411413434, "res": {"Yes": 0.9732013411413434, "No": 0.026798348942130887}, "ground_truth": 0}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9889514189016969, "res": {"Yes": 0.9889514189016969, "No": 0.01104828573673958}, "ground_truth": 1}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9936990683518537, "res": {"Yes": 0.9936990683518537, "No": 0.006300902511822168}, "ground_truth": 0}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6924534246538706, "res": {"Yes": 0.6924534246538706, "No": 0.30754637195485196}, "ground_truth": 0}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.4698384813705117, "res": {"No": 0.5301613207373852, "Yes": 0.4698384813705117}, "ground_truth": 0}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6353609604103908, "res": {"Yes": 0.6353609604103908, "No": 0.36463865575156174}, "ground_truth": 0}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9873120953528401, "res": {"Yes": 0.9873120953528401, "No": 0.01268767790765774}, "ground_truth": 1}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9174816639947279, "res": {"Yes": 0.9174816639947279, "No": 0.08251807654550737}, "ground_truth": 0}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9557528026507757, "res": {"Yes": 0.9557528026507757, "No": 0.044246995860103173}, "ground_truth": 0}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9971275761264625, "res": {"Yes": 0.9971275761264625, "No": 0.0028722545503105197}, "ground_truth": 0}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992266442937896, "res": {"Yes": 0.9992266442937896, "No": 0.0007733501009068112}, "ground_truth": 0}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997016436671056, "res": {"Yes": 0.9997016436671056, "No": 0.00029813981608404675}, "ground_truth": 1}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9957402827426112, "res": {"Yes": 0.9957402827426112, "No": 0.004259713817278825}, "ground_truth": 0}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999644035315693, "res": {"Yes": 0.9999644035315693, "No": 3.5574338808780316e-05}, "ground_truth": 0}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9917208485200464, "res": {"Yes": 0.9917208485200464, "No": 0.008279146463639577}, "ground_truth": 0}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9945347216722424, "res": {"Yes": 0.9945347216722424, "No": 0.00546529215224639}, "ground_truth": 0}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9594584061298033, "res": {"Yes": 0.9594584061298033, "No": 0.04054147316759458}, "ground_truth": 1}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8468373783410331, "res": {"Yes": 0.8468373783410331, "No": 0.15316229012257665}, "ground_truth": 0}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9827294397880086, "res": {"Yes": 0.9827294397880086, "No": 0.017270533423680547}, "ground_truth": 0}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9428856477667871, "res": {"Yes": 0.9428856477667871, "No": 0.05711409757849073}, "ground_truth": 0}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8928406388195036, "res": {"Yes": 0.8928406388195036, "No": 0.10715901780870318}, "ground_truth": 0}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9951509408320803, "res": {"Yes": 0.9951509408320803, "No": 0.004848992633690506}, "ground_truth": 1}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9897487232192627, "res": {"Yes": 0.9897487232192627, "No": 0.01025119494697479}, "ground_truth": 0}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9676520488421333, "res": {"Yes": 0.9676520488421333, "No": 0.03234791536960917}, "ground_truth": 0}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9813612357858562, "res": {"Yes": 0.9813612357858562, "No": 0.01863865367629151}, "ground_truth": 0}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9259558904256996, "res": {"Yes": 0.9259558904256996, "No": 0.07404387915175324}, "ground_truth": 0}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981706137705666, "res": {"Yes": 0.9981706137705666, "No": 0.0018293704275160565}, "ground_truth": 1}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9516449770602325, "res": {"Yes": 0.9516449770602325, "No": 0.048354842837473204}, "ground_truth": 0}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.799223146484095, "res": {"Yes": 0.799223146484095, "No": 0.20077668669931054}, "ground_truth": 0}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9760101968806537, "res": {"Yes": 0.9760101968806537, "No": 0.02398960421937629}, "ground_truth": 0}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9818337817254326, "res": {"Yes": 0.9818337817254326, "No": 0.018166035676280865}, "ground_truth": 0}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9946775213507476, "res": {"Yes": 0.9946775213507476, "No": 0.005322271995352872}, "ground_truth": 1}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998119416517203, "res": {"Yes": 0.998119416517203, "No": 0.0018805020310424232}, "ground_truth": 0}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9639440591333996, "res": {"Yes": 0.9639440591333996, "No": 0.03605568608318424}, "ground_truth": 0}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5815276666972619, "res": {"Yes": 0.5815276666972619, "No": 0.41847216609415605}, "ground_truth": 0}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.20861621644656206, "res": {"No": 0.791383436195726, "Yes": 0.20861621644656206}, "ground_truth": 0}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7086372496369062, "res": {"Yes": 0.7086372496369062, "No": 0.29136269888100713}, "ground_truth": 1}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9435398818081072, "res": {"Yes": 0.9435398818081072, "No": 0.05646007092265713}, "ground_truth": 0}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8448453044850013, "res": {"Yes": 0.8448453044850013, "No": 0.1551544199897723}, "ground_truth": 0}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9428563980219171, "res": {"Yes": 0.9428563980219171, "No": 0.057143490342334985}, "ground_truth": 0}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.42323427992727325, "res": {"No": 0.5767651294630687, "Yes": 0.42323427992727325}, "ground_truth": 0}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9906005397067635, "res": {"Yes": 0.9906005397067635, "No": 0.009399366364595098}, "ground_truth": 1}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9018352276970744, "res": {"Yes": 0.9018352276970744, "No": 0.09816472932328697}, "ground_truth": 0}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6658409438583639, "res": {"Yes": 0.6658409438583639, "No": 0.3341589503359901}, "ground_truth": 0}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9639622296501709, "res": {"Yes": 0.9639622296501709, "No": 0.03603752580349409}, "ground_truth": 0}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9952371368766779, "res": {"Yes": 0.9952371368766779, "No": 0.004762661634988686}, "ground_truth": 0}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9626634943362891, "res": {"Yes": 0.9626634943362891, "No": 0.037336196773732}, "ground_truth": 1}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9682632250174468, "res": {"Yes": 0.9682632250174468, "No": 0.03173671393379036}, "ground_truth": 0}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9345341249059027, "res": {"Yes": 0.9345341249059027, "No": 0.06546559117141718}, "ground_truth": 0}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9580021967073215, "res": {"Yes": 0.9580021967073215, "No": 0.041997537221556795}, "ground_truth": 0}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9626084825774718, "res": {"Yes": 0.9626084825774718, "No": 0.037391214820577216}, "ground_truth": 0}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9727705691304905, "res": {"Yes": 0.9727705691304905, "No": 0.02722855609391669}, "ground_truth": 1}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9827044748464262, "res": {"Yes": 0.9827044748464262, "No": 0.017295343077278744}, "ground_truth": 0}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9943443278725144, "res": {"Yes": 0.9943443278725144, "No": 0.005655623042355945}, "ground_truth": 0}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.1576270152007549, "res": {"No": 0.8423727809477327, "Yes": 0.1576270152007549}, "ground_truth": 0}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8262181468721769, "res": {"Yes": 0.8262181468721769, "No": 0.17378167656298915}, "ground_truth": 0}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5840986845708249, "res": {"Yes": 0.5840986845708249, "No": 0.4159010822702256}, "ground_truth": 1}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9062165353548916, "res": {"Yes": 0.9062165353548916, "No": 0.09378330724009003}, "ground_truth": 0}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7864639763595233, "res": {"Yes": 0.7864639763595233, "No": 0.21353600902423248}, "ground_truth": 0}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7897680533384721, "res": {"Yes": 0.7897680533384721, "No": 0.21023185203041544}, "ground_truth": 0}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6589758115060044, "res": {"Yes": 0.6589758115060044, "No": 0.34102421243878145}, "ground_truth": 0}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.772988103926653, "res": {"Yes": 0.772988103926653, "No": 0.22701179556712175}, "ground_truth": 1}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6431418452038845, "res": {"Yes": 0.6431418452038845, "No": 0.35685797695884836}, "ground_truth": 0}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.39972640236193574, "res": {"No": 0.6002733164922834, "Yes": 0.39972640236193574}, "ground_truth": 0}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8019380888844946, "res": {"Yes": 0.8019380888844946, "No": 0.19806171597728495}, "ground_truth": 0}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8640240024072564, "res": {"Yes": 0.8640240024072564, "No": 0.13597561646759893}, "ground_truth": 0}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8041979690107149, "res": {"Yes": 0.8041979690107149, "No": 0.19580179136001982}, "ground_truth": 1}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9446317084116891, "res": {"Yes": 0.9446317084116891, "No": 0.05536808136015625}, "ground_truth": 0}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.5981854596014773, "res": {"Yes": 0.5981854596014773, "No": 0.4018141944791234}, "ground_truth": 0}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.49858178542400045, "res": {"No": 0.5014179972025203, "Yes": 0.49858178542400045}, "ground_truth": 0}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8731689271251815, "res": {"Yes": 0.8731689271251815, "No": 0.1268307447356185}, "ground_truth": 0}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7331411506096248, "res": {"Yes": 0.7331411506096248, "No": 0.26685837287090636}, "ground_truth": 1}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9003093342396836, "res": {"Yes": 0.9003093342396836, "No": 0.09969045936734709}, "ground_truth": 0}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9032002132877109, "res": {"Yes": 0.9032002132877109, "No": 0.09679944676050281}, "ground_truth": 0}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9944960448695404, "res": {"Yes": 0.9944960448695404, "No": 0.005503720284752279}, "ground_truth": 0}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987330710330015, "res": {"Yes": 0.9987330710330015, "No": 0.0012667458369700975}, "ground_truth": 0}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9902102150101664, "res": {"Yes": 0.9902102150101664, "No": 0.00978945395291357}, "ground_truth": 1}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9822124645673722, "res": {"Yes": 0.9822124645673722, "No": 0.017787518064392348}, "ground_truth": 0}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9972184905856314, "res": {"Yes": 0.9972184905856314, "No": 0.0027814219200766052}, "ground_truth": 0}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9796793213293571, "res": {"Yes": 0.9796793213293571, "No": 0.020320501021620748}, "ground_truth": 0}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9798602563955219, "res": {"Yes": 0.9798602563955219, "No": 0.02013956682732968}, "ground_truth": 0}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9303925677876408, "res": {"Yes": 0.9303925677876408, "No": 0.06960716997955606}, "ground_truth": 1}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984473012858899, "res": {"Yes": 0.9984473012858899, "No": 0.0015527185206805772}, "ground_truth": 0}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9725094911498878, "res": {"Yes": 0.9725094911498878, "No": 0.027490274263737104}, "ground_truth": 0}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.988973567705696, "res": {"Yes": 0.988973567705696, "No": 0.011026222273330654}, "ground_truth": 0}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.898679900186189, "res": {"Yes": 0.898679900186189, "No": 0.10131991426798277}, "ground_truth": 0}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9367509379760574, "res": {"Yes": 0.9367509379760574, "No": 0.06324893433341514}, "ground_truth": 1}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9559920804625864, "res": {"Yes": 0.9559920804625864, "No": 0.04400790069205445}, "ground_truth": 0}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9890071508513228, "res": {"Yes": 0.9890071508513228, "No": 0.01099270618725929}, "ground_truth": 0}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7656930711071948, "res": {"Yes": 0.7656930711071948, "No": 0.23430666205635534}, "ground_truth": 0}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.5857301209161555, "res": {"Yes": 0.5857301209161555, "No": 0.41426885819754583}, "ground_truth": 0}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9553648811219746, "res": {"Yes": 0.9553648811219746, "No": 0.04463496761066023}, "ground_truth": 1}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9503428193586186, "res": {"Yes": 0.9503428193586186, "No": 0.04965702043486858}, "ground_truth": 0}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.974545646682679, "res": {"Yes": 0.974545646682679, "No": 0.025454162331169847}, "ground_truth": 0}]