[{"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 5.272331375426766e-07}, "ground_truth": 0}, {"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9978660586970486, "res": {"Yes": 0.9978660586970486, "No": 0.00213316923785194}, "ground_truth": 1}, {"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.9718294629048455e-08}, "ground_truth": 0}, {"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.5198153118570314, "res": {"Yes": 0.5198153118570314, "No": 0.4801824082888678}, "ground_truth": 0}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999798995780161, "res": {"Yes": 0.9999798995780161, "No": 1.864053588359824e-05}, "ground_truth": 0}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 4.315299153884449e-07}, "ground_truth": 0}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999399679479958, "res": {"Yes": 0.9999399679479958, "No": 5.940982150866352e-05}, "ground_truth": 1}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.1624637218228532e-07}, "ground_truth": 0}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 2.0900200327075508e-07}, "ground_truth": 0}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 3.128130841581417e-07}, "ground_truth": 0}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 8.164705449148621e-07}, "ground_truth": 0}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 5.975660591530515e-07}, "ground_truth": 1}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9947584471296699, "res": {"Yes": 0.9947584471296699, "No": 0.0052392653021123715}, "ground_truth": 0}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999639267366558, "res": {"Yes": 0.9999639267366558, "No": 3.597785311737258e-05}, "ground_truth": 0}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.990661846899404, "res": {"Yes": 0.990661846899404, "No": 0.009337688885745824}, "ground_truth": 0}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 2.848192429953257e-06, "res": {"No": 0.9999963494876631, "Yes": 2.848192429953257e-06}, "ground_truth": 0}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 4.720481201882619e-08}, "ground_truth": 1}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.683879994248851e-07}, "ground_truth": 0}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 7.846469959384858e-08}, "ground_truth": 0}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999973031140366, "res": {"Yes": 0.9999973031140366, "No": 1.963715924081413e-06}, "ground_truth": 0}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 8.671412919013139e-08}, "ground_truth": 0}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999858596579756, "res": {"Yes": 0.9999858596579756, "No": 1.3481286760843214e-05}, "ground_truth": 1}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 8.491763926627066e-08}, "ground_truth": 0}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.1398186273787965e-07}, "ground_truth": 0}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.2738306032759614, "res": {"No": 0.7261690354935489, "Yes": 0.2738306032759614}, "ground_truth": 0}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998074449011132, "res": {"Yes": 0.9998074449011132, "No": 0.00019252279416494996}, "ground_truth": 0}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 7.331929715600889e-08}, "ground_truth": 1}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.157606003053507e-08}, "ground_truth": 0}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.8099073406536958e-07}, "ground_truth": 0}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9997627567966206, "res": {"Yes": 0.9997627567966206, "No": 0.0002367783704810973}, "ground_truth": 0}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.0753816813244956e-07}, "ground_truth": 0}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.290354610718938e-08}, "ground_truth": 1}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.243411616741913e-08}, "ground_truth": 0}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.481114303470635e-08}, "ground_truth": 0}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9919096465027316, "res": {"Yes": 0.9919096465027316, "No": 0.008089268275699443}, "ground_truth": 0}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9975180350808609, "res": {"Yes": 0.9975180350808609, "No": 0.0024816425983746368}, "ground_truth": 0}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 8.933333798845945e-07}, "ground_truth": 1}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 9.97589467973987e-07}, "ground_truth": 0}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.7288503515971986e-07}, "ground_truth": 0}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9922209178951861, "res": {"Yes": 0.9922209178951861, "No": 0.007778610155518444}, "ground_truth": 0}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9513247710041419, "res": {"Yes": 0.9513247710041419, "No": 0.04867478231244633}, "ground_truth": 0}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999963494876631, "res": {"Yes": 0.9999963494876631, "No": 3.103295334315022e-06}, "ground_truth": 1}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 9.83361623527065e-08}, "ground_truth": 0}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 2.0819327124412085e-07}, "ground_truth": 0}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.14496009229424472, "res": {"No": 0.8550391086332058, "Yes": 0.14496009229424472}, "ground_truth": 0}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999962302846054, "res": {"Yes": 0.9999962302846054, "No": 3.385655574140565e-06}, "ground_truth": 0}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.258043392539887e-07}, "ground_truth": 1}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.404919001517395e-07}, "ground_truth": 0}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9498487781052722, "res": {"Yes": 0.9498487781052722, "No": 0.050150008253396425}, "ground_truth": 0}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 7.91587036245521e-07}, "ground_truth": 0}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.8823129123380072, "res": {"Yes": 0.8823129123380072, "No": 0.11768647289444789}, "ground_truth": 0}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.917129822154598e-08}, "ground_truth": 1}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999883629027115, "res": {"Yes": 0.9999883629027115, "No": 1.113122873831362e-05}, "ground_truth": 0}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999847868417213, "res": {"Yes": 0.9999847868417213, "No": 1.4644393382754459e-05}, "ground_truth": 0}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.3007630300135551, "res": {"No": 0.699236299659089, "Yes": 0.3007630300135551}, "ground_truth": 0}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986706434782372, "res": {"Yes": 0.9986706434782372, "No": 0.0013288736171142663}, "ground_truth": 0}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.7805381515786214e-07}, "ground_truth": 1}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.13411843618653e-07}, "ground_truth": 0}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 1.7538855675256068e-07}, "ground_truth": 0}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999950382530095, "res": {"Yes": 0.9999950382530095, "No": 4.754262375870968e-06}, "ground_truth": 0}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.8134954707074056, "res": {"Yes": 0.8134954707074056, "No": 0.18650375612885065}, "ground_truth": 0}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 1.0588428394778859e-07}, "ground_truth": 1}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.051315908210627e-08}, "ground_truth": 0}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.546873741153381e-08}, "ground_truth": 0}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00010964145674479265, "res": {"No": 0.9998901450444976, "Yes": 0.00010964145674479265}, "ground_truth": 0}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999896741293122, "res": {"Yes": 0.9999896741293122, "No": 1.0255473447770107e-05}, "ground_truth": 0}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999971839107652, "res": {"Yes": 0.9999971839107652, "No": 2.4995121128382702e-06}, "ground_truth": 1}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.461823694405959e-08}, "ground_truth": 0}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.996774859773987, "res": {"Yes": 0.996774859773987, "No": 0.0032247624189200015}, "ground_truth": 0}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.020174674475493266, "res": {"No": 0.9798249987182583, "Yes": 0.020174674475493266}, "ground_truth": 0}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999783499623655, "res": {"Yes": 0.9999783499623655, "No": 2.1392353775364734e-05}, "ground_truth": 0}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999448550185404, "res": {"Yes": 0.9999448550185404, "No": 5.441498404640855e-05}, "ground_truth": 1}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 3.272205266255043e-07}, "ground_truth": 0}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999928926002577, "res": {"Yes": 0.9999928926002577, "No": 6.8309691390773895e-06}, "ground_truth": 0}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.018941204456082453, "res": {"No": 0.9810583579105896, "Yes": 0.018941204456082453}, "ground_truth": 0}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.015919057541066865, "res": {"No": 0.9840809156097214, "Yes": 0.015919057541066865}, "ground_truth": 0}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 1.7569751241367537e-07}, "ground_truth": 1}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.73180401938562e-07}, "ground_truth": 0}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.590249068720523e-07}, "ground_truth": 0}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00022783747776607374, "res": {"No": 0.9997718141583503, "Yes": 0.00022783747776607374}, "ground_truth": 0}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 5.919278449660807e-06, "res": {"No": 0.9999938462231346, "Yes": 5.919278449660807e-06}, "ground_truth": 0}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999974223173222, "res": {"Yes": 0.9999974223173222, "No": 1.9801073451602436e-06}, "ground_truth": 1}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999866940725246, "res": {"Yes": 0.9999866940725246, "No": 1.2394645414825824e-05}, "ground_truth": 0}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.3047293843521148e-07}, "ground_truth": 0}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999825220097418, "res": {"Yes": 0.9999825220097418, "No": 1.6974064440500893e-05}, "ground_truth": 0}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999748931371826, "res": {"Yes": 0.9999748931371826, "No": 2.4669818119196084e-05}, "ground_truth": 0}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 8.662537936950414e-08}, "ground_truth": 1}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.822668978249257e-08}, "ground_truth": 0}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999886013079656, "res": {"Yes": 0.9999886013079656, "No": 1.102185982864192e-05}, "ground_truth": 0}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999142218341572, "res": {"Yes": 0.9999142218341572, "No": 8.569783459065584e-05}, "ground_truth": 0}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9933859368425924, "res": {"Yes": 0.9933859368425924, "No": 0.00661378613594838}, "ground_truth": 0}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 7.115494597991212e-08}, "ground_truth": 1}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999926541946805, "res": {"Yes": 0.9999926541946805, "No": 7.040734928430152e-06}, "ground_truth": 0}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.097869604713624e-07}, "ground_truth": 0}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 3.219440108115287e-05, "res": {"No": 0.999967264321824, "Yes": 3.219440108115287e-05}, "ground_truth": 0}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 9.690671715182232e-08}, "ground_truth": 1}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999774078524101, "res": {"Yes": 0.999774078524101, "No": 0.00022521005803915892}, "ground_truth": 0}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999739395305799, "res": {"Yes": 0.9999739395305799, "D": 1.4840850656519276e-05}, "ground_truth": 0}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.8527295578520278, "res": {"Yes": 0.8527295578520278, "No": 0.14726982949738232}, "ground_truth": 0}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 3.403399043655354e-07}, "ground_truth": 0}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.8354956205181096e-08}, "ground_truth": 1}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.6965898923484e-08}, "ground_truth": 0}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.025409886831078e-08}, "ground_truth": 0}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.010647847712214572, "res": {"No": 0.9893519531209857, "Yes": 0.010647847712214572}, "ground_truth": 0}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.1952657647823604e-06}, "ground_truth": 1}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 1.7236974278624956e-07}, "ground_truth": 0}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.683660015084308e-08}, "ground_truth": 0}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 2.394444985006362e-05, "res": {"No": 0.9999756083404814, "Yes": 2.394444985006362e-05}, "ground_truth": 0}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999981375378344, "res": {"Yes": 0.9999981375378344, "No": 1.7295643534176532e-06}, "ground_truth": 0}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.422249379107866e-08}, "ground_truth": 1}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.2830982370724743e-07}, "ground_truth": 0}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 4.0033505618536687e-07}, "ground_truth": 0}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999922965856715, "res": {"Yes": 0.9999922965856715, "No": 7.300235135399298e-06}, "ground_truth": 0}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997092704700228, "res": {"Yes": 0.9997092704700228, "No": 0.00029027407357465145}, "ground_truth": 0}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.0859678868537874e-07}, "ground_truth": 1}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.412512228515309e-08}, "ground_truth": 0}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 8.1946102449276e-07}, "ground_truth": 0}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 1.3179382160912263e-06}, "ground_truth": 0}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 6.329331680981071e-07}, "ground_truth": 0}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999930118027176, "res": {"Yes": 0.9999930118027176, "No": 6.101554397400104e-06}, "ground_truth": 1}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "\"Yes": 1.848945822418856e-07}, "ground_truth": 0}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999996945503965, "res": {"Yes": 0.999996945503965, "\"Yes": 1.2430444323706983e-06}, "ground_truth": 0}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999980183344636, "res": {"Yes": 0.9999980183344636, "No": 1.4315439654875377e-06}, "ground_truth": 0}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.5733256869974863e-07}, "ground_truth": 0}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.0189764580273184e-07}, "ground_truth": 1}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9850866412681863, "res": {"Yes": 0.9850866412681863, "2": 0.008551980375400848}, "ground_truth": 0}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "yes": 5.54005623238855e-07}, "ground_truth": 0}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 3.66023540982886e-05, "res": {"No": 0.9999628539429318, "Yes": 3.66023540982886e-05}, "ground_truth": 0}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 7.499935833484225e-05, "res": {"No": 0.9999243533313535, "Yes": 7.499935833484225e-05}, "ground_truth": 0}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 7.853063763160613e-08}, "ground_truth": 1}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996762685423404, "res": {"Yes": 0.9996762685423404, "No": 0.00032290545066259867}, "ground_truth": 0}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999971839107652, "res": {"Yes": 0.9999971839107652, "No": 2.464348710962818e-06}, "ground_truth": 0}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.945970498438046, "res": {"Yes": 0.945970498438046, "No": 0.05402861312854635}, "ground_truth": 0}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999396103605277, "res": {"Yes": 0.9999396103605277, "No": 5.973395134306581e-05}, "ground_truth": 0}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999963494876631, "res": {"Yes": 0.9999963494876631, "No": 3.0767029657560186e-06}, "ground_truth": 1}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999813299942867, "res": {"Yes": 0.9999813299942867, "No": 1.831522274350345e-05}, "ground_truth": 0}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.957065375311722e-08}, "ground_truth": 0}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9975672513410104, "res": {"Yes": 0.9975672513410104, "No": 0.0024323072171864653}, "ground_truth": 0}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.0267779044704943e-07}, "ground_truth": 0}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.0070383042109363e-07}, "ground_truth": 1}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 4.397058277141187e-08}, "ground_truth": 0}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.101903330794173e-08}, "ground_truth": 0}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.999933531359138, "res": {"Yes": 0.999933531359138, "No": 6.60339865995689e-05}, "ground_truth": 0}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9954125128117642, "res": {"Yes": 0.9954125128117642, "No": 0.004586888369223263}, "ground_truth": 0}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 8.388609250220592e-08}, "ground_truth": 1}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.5715991234500122e-07}, "ground_truth": 0}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 1.4157814653177229e-07}, "ground_truth": 0}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.61390487888354e-08}, "ground_truth": 0}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.030710050503539588, "res": {"No": 0.9692892597149331, "Yes": 0.030710050503539588}, "ground_truth": 0}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.4725022299773576e-07}, "ground_truth": 1}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 5.362955117013053e-07}, "ground_truth": 0}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.3405106868630857e-07}, "ground_truth": 0}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0007733877639738142, "res": {"No": 0.9992259295871956, "Yes": 0.0007733877639738142}, "ground_truth": 0}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999996945503965, "res": {"Yes": 0.999996945503965, "No": 2.484810894758664e-06}, "ground_truth": 0}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.3459272099326356e-07}, "ground_truth": 1}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 6.929347486374153e-07}, "ground_truth": 0}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, " Yes": 4.508365409760703e-07}, "ground_truth": 0}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0011570685320311637, "res": {"No": 0.9988427080794068, "Yes": 0.0011570685320311637}, "ground_truth": 0}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9982833362556781, "res": {"Yes": 0.9982833362556781, "No": 0.0017147242473659933}, "ground_truth": 0}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999357960811598, "res": {"Yes": 0.9999357960811598, "No": 6.373032812354899e-05}, "ground_truth": 1}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 5.985427373838554e-08}, "ground_truth": 0}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.541084906508838e-07}, "ground_truth": 0}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.4370618553601151e-07}, "ground_truth": 0}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999977799274644, "res": {"Yes": 0.9999977799274644, "No": 1.8727361775050302e-06}, "ground_truth": 0}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 3.3244945614480714e-07}, "ground_truth": 1}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.831590666340466e-08}, "ground_truth": 0}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.633506042163234e-07}, "ground_truth": 0}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.4324547170378812e-06, "res": {"No": 0.9999978991308068, "Yes": 1.4324547170378812e-06}, "ground_truth": 0}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0015619831735466032, "res": {"No": 0.9984374408687217, "Yes": 0.0015619831735466032}, "ground_truth": 0}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.00022872549162174049, "res": {"No": 0.9997708607564074, "Yes": 0.00022872549162174049}, "ground_truth": 1}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987524511369084, "res": {"Yes": 0.9987524511369084, "No": 0.0012467455285865726}, "ground_truth": 0}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.06609662449961977, "res": {"No": 0.9339025942530947, "Yes": 0.06609662449961977}, "ground_truth": 0}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 5.962600693987513e-08}, "ground_truth": 0}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 1.8095797770704367e-07}, "ground_truth": 1}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "1": 1.4970038106495308e-07}, "ground_truth": 0}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.151204795143925e-08}, "ground_truth": 0}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999963494876631, "res": {"Yes": 0.9999963494876631, "No": 3.1049653564005395e-06}, "ground_truth": 0}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9783923534514967, "res": {"Yes": 0.9783923534514967, "No": 0.02160692529865001}, "ground_truth": 0}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 2.7955577821110484e-07}, "ground_truth": 1}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986871726128439, "res": {"Yes": 0.9986871726128439, "No": 0.0013112960233929174}, "ground_truth": 0}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9178846315059941, "res": {"Yes": 0.9178846315059941, "No": 0.08211241959248602}, "ground_truth": 0}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.8953772510181294, "res": {"Yes": 0.8953772510181294, "No": 0.10462186654622099}, "ground_truth": 0}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9151283136876536, "res": {"Yes": 0.9151283136876536, "No": 0.0848708576157299}, "ground_truth": 0}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.12194372621972e-07}, "ground_truth": 1}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.5686003792500705e-08}, "ground_truth": 0}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.773959285380032e-08}, "ground_truth": 0}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 1.755556224871183e-08}, "ground_truth": 0}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.681599117828415e-08}, "ground_truth": 0}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.7473418324470894e-08}, "ground_truth": 1}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.5044790377654162e-08}, "ground_truth": 0}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.1476652381324748e-07}, "ground_truth": 0}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999967070975216, "res": {"Yes": 0.9999967070975216, "No": 2.971427740356708e-06}, "ground_truth": 0}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999577283874698, "res": {"Yes": 0.9999577283874698, "No": 4.175200850167515e-05}, "ground_truth": 0}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "Service": 1.1144795871392245e-06}, "ground_truth": 1}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 1.965462809303534e-08}, "ground_truth": 0}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.077946901954937e-08}, "ground_truth": 0}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.24000546141577372, "res": {"No": 0.7599940535566012, "Yes": 0.24000546141577372}, "ground_truth": 0}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999901509395023, "res": {"Yes": 0.9999901509395023, "No": 9.383506851975748e-06}, "ground_truth": 0}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.3522862126430872e-07}, "ground_truth": 1}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 9.248087227754008e-08}, "ground_truth": 0}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999951574563252, "res": {"Yes": 0.9999951574563252, "No": 4.58121642556861e-06}, "ground_truth": 0}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9984650131991997, "res": {"Yes": 0.9984650131991997, "No": 0.001534872443642641}, "ground_truth": 0}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.545708919424769, "res": {"Yes": 0.545708919424769, "No": 0.45429015824988805}, "ground_truth": 0}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 8.309793375125275e-08}, "ground_truth": 1}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999974223173222, "res": {"Yes": 0.9999974223173222, "No": 2.1379780756951306e-06}, "ground_truth": 0}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.520604040461759e-07}, "ground_truth": 0}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9980289412925621, "res": {"Yes": 0.9980289412925621, "No": 0.0019699317135610517}, "ground_truth": 0}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.863453408001962, "res": {"Yes": 0.863453408001962, "No": 0.1365457576024776}, "ground_truth": 0}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.7690604163005266e-08}, "ground_truth": 1}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999936078174301, "res": {"Yes": 0.9999936078174301, "No": 6.159216377227232e-06}, "ground_truth": 0}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 1.8316654793273165e-07}, "ground_truth": 0}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9995121275284962, "res": {"Yes": 0.9995121275284962, "No": 0.00048738285199690517}, "ground_truth": 0}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.005997313859983025, "res": {"No": 0.9940023976557013, "Yes": 0.005997313859983025}, "ground_truth": 0}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.16907919620725e-08}, "ground_truth": 1}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998802522853301, "res": {"Yes": 0.9998802522853301, "No": 0.00011926460045608737}, "ground_truth": 0}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999956342685299, "res": {"Yes": 0.9999956342685299, "No": 4.2062459769482995e-06}, "ground_truth": 0}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.3372321207922903, "res": {"No": 0.662766690782235, "Yes": 0.3372321207922903}, "ground_truth": 0}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.008746039584410436, "res": {"No": 0.9912537320960656, "Yes": 0.008746039584410436}, "ground_truth": 0}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.05102450354376999, "res": {"No": 0.9489750742465732, "Yes": 0.05102450354376999}, "ground_truth": 1}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999943230348141, "res": {"Yes": 0.9999943230348141, "No": 5.513019948036732e-06}, "ground_truth": 0}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "YES": 1.541821717817159e-07}, "ground_truth": 0}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.3211562545465418, "res": {"No": 0.6788434076761088, "Yes": 0.3211562545465418}, "ground_truth": 0}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9698980520379586, "res": {"Yes": 0.9698980520379586, "No": 0.030101321636556426}, "ground_truth": 0}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997954077218552, "res": {"Yes": 0.9997954077218552, "No": 0.00020352555165612145}, "ground_truth": 1}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 7.451733209903637e-07}, "ground_truth": 0}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.5140083208806638e-07}, "ground_truth": 0}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 5.185978586260912e-06, "res": {"No": 0.999994561441089, "Yes": 5.185978586260912e-06}, "ground_truth": 0}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.2284063156797981e-07}, "ground_truth": 0}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.6843852340293024e-07}, "ground_truth": 1}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.3045623896788046e-07}, "ground_truth": 0}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999965878943212, "res": {"Yes": 0.9999965878943212, "No": 3.1295479148320924e-06}, "ground_truth": 0}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0015281055704741676, "res": {"No": 0.9984715482739438, "Yes": 0.0015281055704741676}, "ground_truth": 0}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.529179258551724e-08}, "ground_truth": 1}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9961855983823507, "res": {"Yes": 0.9961855983823507, "No": 0.003814017436899971}, "ground_truth": 0}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996626874526736, "res": {"Yes": 0.9996626874526736, "No": 0.00033711427167474646}, "ground_truth": 0}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.971833157419889, "res": {"Yes": 0.971833157419889, "No": 0.02816641410538617}, "ground_truth": 0}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 6.38406250484221e-07}, "ground_truth": 0}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 4.983570743940561e-07}, "ground_truth": 1}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999804955832136, "res": {"Yes": 0.9999804955832136, "No": 1.942938397335653e-05}, "ground_truth": 0}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.7640241714318975e-07}, "ground_truth": 0}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.011457699838656996, "res": {"No": 0.988541881033882, "Yes": 0.011457699838656996}, "ground_truth": 0}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 2.6126748883626203e-06, "res": {"No": 0.9999967070975216, "Yes": 2.6126748883626203e-06}, "ground_truth": 0}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999829988145218, "res": {"Yes": 0.9999829988145218, "No": 1.6236544872865694e-05}, "ground_truth": 1}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.985265909676558e-08}, "ground_truth": 0}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9942040861918369, "res": {"Yes": 0.9942040861918369, "No": 0.005794778647921105}, "ground_truth": 0}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.04367901491989492, "res": {"No": 0.9563189230945769, "Yes": 0.04367901491989492}, "ground_truth": 0}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9971325535006071, "res": {"Yes": 0.9971325535006071, "No": 0.0028659110108668483}, "ground_truth": 0}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999899125338788, "res": {"Yes": 0.9999899125338788, "No": 9.572022857179797e-06}, "ground_truth": 1}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989953234257961, "res": {"Yes": 0.9989953234257961, "No": 0.0010038529827084872}, "ground_truth": 0}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999981375378344, "res": {"Yes": 0.9999981375378344, "No": 5.532734350682659e-07}, "ground_truth": 0}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9998955086436021, "res": {"Yes": 0.9998955086436021, "No": 0.00010405098901291812}, "ground_truth": 0}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 3.377229338353905e-07}, "ground_truth": 0}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999899125338788, "res": {"Yes": 0.9999899125338788, "No": 9.675235289654554e-06}, "ground_truth": 1}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999711979237877, "res": {"Yes": 0.9999711979237877, "No": 2.834780374793637e-05}, "ground_truth": 0}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999675027220479, "res": {"Yes": 0.9999675027220479, "No": 3.194675331461785e-05}, "ground_truth": 0}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9992489037141375, "res": {"Yes": 0.9992489037141375, "No": 0.0007505286748192976}, "ground_truth": 0}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9967707121027475, "res": {"Yes": 0.9967707121027475, "No": 0.003226977301078865}, "ground_truth": 0}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 4.0538038030199064e-07}, "ground_truth": 1}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 2.687237329880675e-07}, "ground_truth": 0}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.517653892935604e-07}, "ground_truth": 0}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.999946523779904, "res": {"Yes": 0.999946523779904, "No": 5.3113947469369615e-05}, "ground_truth": 0}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 5.0636798646895464e-05, "res": {"No": 0.9999491461231023, "Yes": 5.0636798646895464e-05}, "ground_truth": 0}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999909853566321, "res": {"Yes": 0.9999909853566321, "No": 8.433050407326791e-06}, "ground_truth": 1}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999918197754583, "res": {"Yes": 0.9999918197754583, "No": 7.740753683299437e-06}, "ground_truth": 0}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999928926002577, "res": {"Yes": 0.9999928926002577, "No": 6.939769480940737e-06}, "ground_truth": 0}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9997809907960179, "res": {"Yes": 0.9997809907960179, "No": 0.00021852502917259157}, "ground_truth": 0}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.372761553465003e-08}, "ground_truth": 0}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.785384040176197e-08}, "ground_truth": 1}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.1853115494476014e-07}, "ground_truth": 0}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.547961265050466e-08}, "ground_truth": 0}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9890417805539614, "res": {"Yes": 0.9890417805539614, "No": 0.010957418100349451}, "ground_truth": 0}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9846150011431789, "res": {"Yes": 0.9846150011431789, "No": 0.015384041086036873}, "ground_truth": 0}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 5.799644664639695e-07}, "ground_truth": 1}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999930118027176, "res": {"Yes": 0.9999930118027176, "No": 6.629076861745172e-06}, "ground_truth": 0}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999943230348141, "res": {"Yes": 0.9999943230348141, "No": 5.264886342398739e-06}, "ground_truth": 0}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.152168863330748e-08}, "ground_truth": 0}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.262283839105838e-07}, "ground_truth": 0}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.526458831544677e-07}, "ground_truth": 1}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.0415965194849906e-07}, "ground_truth": 0}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 9.785294954721223e-08}, "ground_truth": 0}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 6.580953548072364e-07}, "ground_truth": 0}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, " Yes": 2.3381912279499008e-07}, "ground_truth": 0}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.1833621314483574e-07}, "ground_truth": 1}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.6459778593147752e-07}, "ground_truth": 0}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.0305355833773822e-07}, "ground_truth": 0}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9992385445544495, "res": {"Yes": 0.9992385445544495, "No": 0.0007606235504193573}, "ground_truth": 0}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999820452021894, "res": {"Yes": 0.9999820452021894, "No": 1.78005357999229e-05}, "ground_truth": 0}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 9.31992651446506e-08}, "ground_truth": 1}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.824409000254975e-08}, "ground_truth": 0}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.95324279270998e-08}, "ground_truth": 0}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9907416395038109, "res": {"Yes": 0.9907416395038109, "No": 0.009256761699163429}, "ground_truth": 0}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999870516788303, "res": {"Yes": 0.9999870516788303, "No": 1.2534408692294798e-05}, "ground_truth": 0}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 1.1992766195250926e-06}, "ground_truth": 1}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.7110148730032704e-08}, "ground_truth": 0}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "YES": 1.788346859521924e-07}, "ground_truth": 0}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999977799274644, "res": {"Yes": 0.9999977799274644, "No": 2.009834856930023e-06}, "ground_truth": 0}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999894357248024, "res": {"Yes": 0.9999894357248024, "No": 1.0092075964641593e-05}, "ground_truth": 0}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 3.555293611408756e-07}, "ground_truth": 1}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.0500942013084996e-08}, "ground_truth": 0}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 7.954364279006672e-08}, "ground_truth": 0}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 5.3466198524956144e-06, "res": {"No": 0.9999942038320978, "Yes": 5.3466198524956144e-06}, "ground_truth": 0}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.6376702473303528e-07}, "ground_truth": 0}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.4368477490949787e-07}, "ground_truth": 1}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 1.435314014858812e-07}, "ground_truth": 0}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999835948245663, "res": {"Yes": 0.9999835948245663, "No": 1.6092519842380803e-05}, "ground_truth": 0}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999809723900273, "res": {"Yes": 0.9999809723900273, "No": 1.8710662676495707e-05}, "ground_truth": 0}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 5.027237145540453e-06, "res": {"No": 0.9999944422379444, "Yes": 5.027237145540453e-06}, "ground_truth": 0}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999425210055068, "res": {"Yes": 0.999425210055068, "No": 0.0005724262224060279}, "ground_truth": 1}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 5.933632869375684e-07}, "ground_truth": 0}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9833894442452795, "res": {"Yes": 0.9833894442452795, "No": 0.016609856413099297}, "ground_truth": 0}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.999773720984959, "res": {"Yes": 0.999773720984959, "No": 0.0002255345941326429}, "ground_truth": 0}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 3.477959081661922e-07}, "ground_truth": 0}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 8.142373676477043e-08}, "ground_truth": 1}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.7580577542290546e-07}, "ground_truth": 0}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 2.2480982822959969e-07}, "ground_truth": 0}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 6.184428236290687e-06, "res": {"No": 0.9999933694113825, "Yes": 6.184428236290687e-06}, "ground_truth": 0}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.5336044472440863, "res": {"Yes": 0.5336044472440863, "No": 0.46639202180458444}, "ground_truth": 0}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 4.807809865271376e-07}, "ground_truth": 1}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9809318124877596, "res": {"Yes": 0.9809318124877596, "No": 0.01906517703217024}, "ground_truth": 0}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 5.706755513954194e-07}, "ground_truth": 0}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999965878943212, "res": {"Yes": 0.9999965878943212, "No": 2.9461980954755084e-06}, "ground_truth": 0}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9695686607724888, "res": {"Yes": 0.9695686607724888, "No": 0.030430706442879252}, "ground_truth": 0}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "YES": 3.430615362140206e-07}, "ground_truth": 1}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.625436495665775e-08}, "ground_truth": 0}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999763710200833, "res": {"Yes": 0.999763710200833, "No": 0.00023307482933823957}, "ground_truth": 0}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9693405568291819, "res": {"Yes": 0.9693405568291819, "No": 0.03065900067009586}, "ground_truth": 0}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998651226767641, "res": {"Yes": 0.9998651226767641, "No": 0.0001343986062624568}, "ground_truth": 0}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.8981311967559333e-08}, "ground_truth": 1}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.393058190423424e-08}, "ground_truth": 0}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 9.31752228358346e-08}, "ground_truth": 0}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.003454701230608162, "res": {"No": 0.9965451333334405, "Yes": 0.003454701230608162}, "ground_truth": 0}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994056794284768, "res": {"Yes": 0.9994056794284768, "No": 0.0005936764998525644}, "ground_truth": 0}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 6.928994743019038e-08}, "ground_truth": 1}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.0125436710385383e-07}, "ground_truth": 0}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 6.490293440537666e-08}, "ground_truth": 0}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.128369478109753e-07}, "ground_truth": 0}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 3.0301637196772e-07}, "ground_truth": 0}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.52566259542163e-08}, "ground_truth": 1}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.756346431791673e-08}, "ground_truth": 0}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.707435189026109e-08}, "ground_truth": 0}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.999509401882631, "res": {"Yes": 0.999509401882631, "No": 0.00049018386624931}, "ground_truth": 0}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.88917652266639, "res": {"Yes": 0.88917652266639, "No": 0.1108223843781184}, "ground_truth": 0}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.0080448381568391e-07}, "ground_truth": 1}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999182744197171, "res": {"Yes": 0.9999182744197171, "No": 8.118752139562526e-05}, "ground_truth": 0}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999853828508316, "res": {"Yes": 0.9999853828508316, "No": 1.403106160397618e-05}, "ground_truth": 0}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.17490951428665263, "res": {"No": 0.8250900063435771, "Yes": 0.17490951428665263}, "ground_truth": 0}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.7745383070397865, "res": {"Yes": 0.7745383070397865, "No": 0.22546025237234366}, "ground_truth": 0}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 7.542221324113755e-08}, "ground_truth": 1}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 9.38045007196045e-08}, "ground_truth": 0}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 4.0043114812942333e-07}, "ground_truth": 0}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9993530607155173, "res": {"Yes": 0.9993530607155173, "No": 0.000646523507007254}, "ground_truth": 0}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999891973193493, "res": {"Yes": 0.9999891973193493, "No": 1.0464238197353115e-05}, "ground_truth": 0}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.25801194184821e-07}, "ground_truth": 1}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 8.009734792313751e-07}, "ground_truth": 0}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.908596807507127e-08}, "ground_truth": 0}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999946806438478, "res": {"Yes": 0.9999946806438478, "No": 4.7155405674440345e-06}, "ground_truth": 0}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 2.522936878607995e-07}, "ground_truth": 0}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 8.649346283727447e-08}, "ground_truth": 1}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.520860544892449e-08}, "ground_truth": 0}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.546028390153914e-07}, "ground_truth": 0}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.5708040954525933e-07}, "ground_truth": 0}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0015709260386033701, "res": {"No": 0.9984287637861594, "Yes": 0.0015709260386033701}, "ground_truth": 0}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.3883636236275894e-08}, "ground_truth": 1}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.5372375762362625e-08}, "ground_truth": 0}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 7.96080992420654e-08}, "ground_truth": 0}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 6.132873896552158e-05, "res": {"No": 0.9999382992035725, "Yes": 6.132873896552158e-05}, "ground_truth": 0}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999440206399028, "res": {"Yes": 0.9999440206399028, "No": 5.525238782214181e-05}, "ground_truth": 0}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.1908753809770655e-07}, "ground_truth": 1}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, " Yes": 9.822009589570132e-08}, "ground_truth": 0}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.6262465850635573e-07}, "ground_truth": 0}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9984090402204348, "res": {"Yes": 0.9984090402204348, "No": 0.001590613481093964}, "ground_truth": 0}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999703669534548, "res": {"Yes": 0.999703669534548, "No": 0.00029608114579817465}, "ground_truth": 0}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 1.2983958720632077e-06}, "ground_truth": 1}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 7.72666817111734e-08}, "ground_truth": 0}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.334686429997569e-07}, "ground_truth": 0}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.790479615111769, "res": {"Yes": 0.790479615111769, "No": 0.20951538616534857}, "ground_truth": 0}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.994339141883196, "res": {"Yes": 0.994339141883196, "No": 0.0056603417994214545}, "ground_truth": 0}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 7.734312906747755e-07}, "ground_truth": 1}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 1.3232205265333964e-06}, "ground_truth": 0}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 4.571615533943813e-07}, "ground_truth": 0}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 1.4257165906630373e-06}, "ground_truth": 0}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 2.969146559937523e-06, "res": {"No": 0.9999961110815618, "Yes": 2.969146559937523e-06}, "ground_truth": 0}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.7152668238882188e-07}, "ground_truth": 1}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.4427897110374765e-08}, "ground_truth": 0}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.3842191038522966e-07}, "ground_truth": 0}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.5026520164215768e-06, "res": {"No": 0.9999981375378344, "Yes": 1.5026520164215768e-06}, "ground_truth": 0}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999855020530962, "res": {"Yes": 0.9999855020530962, "No": 1.3856723387216423e-05}, "ground_truth": 0}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.3104723581469754e-07}, "ground_truth": 1}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.6310462025124056e-07}, "ground_truth": 0}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.4013393662411876e-07}, "ground_truth": 0}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0055415557297524105, "res": {"No": 0.9944577219072622, "Yes": 0.0055415557297524105}, "ground_truth": 0}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.002668990414463186, "res": {"No": 0.9973305275951432, "Yes": 0.002668990414463186}, "ground_truth": 0}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999447358231209, "res": {"Yes": 0.9999447358231209, "No": 5.4451563204908435e-05}, "ground_truth": 1}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 3.6089455958093476e-07}, "ground_truth": 0}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998169757209873, "res": {"Yes": 0.9998169757209873, "No": 0.0001822970606694779}, "ground_truth": 0}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.9806560840848843e-07}, "ground_truth": 0}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.2426580212200406e-07}, "ground_truth": 1}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999621387467553, "res": {"Yes": 0.9999621387467553, "No": 3.747198390761435e-05}, "ground_truth": 0}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999973031140366, "res": {"Yes": 0.9999973031140366, "No": 2.4717009514978014e-06}, "ground_truth": 0}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 7.526293209543026e-07}, "ground_truth": 0}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9976129317893673, "res": {"Yes": 0.9976129317893673, "No": 0.00238629911650863}, "ground_truth": 0}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 4.063115935103979e-07}, "ground_truth": 1}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999633307373339, "res": {"Yes": 0.9999633307373339, "No": 3.582917687423176e-05}, "ground_truth": 0}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.480863925562857e-07}, "ground_truth": 0}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 4.165055321336273e-06, "res": {"No": 0.9999946806438478, "Yes": 4.165055321336273e-06}, "ground_truth": 0}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.0019435371831393e-07}, "ground_truth": 0}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.1606737447590711e-07}, "ground_truth": 1}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.0349131994805611e-06}, "ground_truth": 0}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 8.380190533737098e-07}, "ground_truth": 0}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.792005407135285, "res": {"Yes": 0.792005407135285, "No": 0.20799398873727568}, "ground_truth": 0}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 6.987274443528414e-06, "res": {"No": 0.9999925349918634, "Yes": 6.987274443528414e-06}, "ground_truth": 0}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 2.592099463346128e-07}, "ground_truth": 1}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 8.514160716432987e-08}, "ground_truth": 0}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 8.431061226294219e-07}, "ground_truth": 0}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.5707429476124227, "res": {"Yes": 0.5707429476124227, "No": 0.4292560220782964}, "ground_truth": 0}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993580640291145, "res": {"Yes": 0.9993580640291145, "No": 0.000641568627432963}, "ground_truth": 0}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.9696476146909513e-07}, "ground_truth": 1}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999876476902904, "res": {"Yes": 0.9999876476902904, "No": 1.0896678935189866e-05}, "ground_truth": 0}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.5518296450643687e-07}, "ground_truth": 0}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0008310671168536804, "res": {"No": 0.9991685635350042, "Yes": 0.0008310671168536804}, "ground_truth": 0}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 4.086531846154315e-07}, "ground_truth": 0}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 3.1521843310800805e-07}, "ground_truth": 1}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "\"Yes": 7.584272869642057e-08}, "ground_truth": 0}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.850857362257029e-08}, "ground_truth": 0}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.006200764426093669, "res": {"No": 0.9937990239039499, "Yes": 0.006200764426093669}, "ground_truth": 0}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 3.318320067029096e-07}, "ground_truth": 0}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999924157887603, "res": {"Yes": 0.9999924157887603, "No": 7.226447485630933e-06}, "ground_truth": 1}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999913429644723, "res": {"Yes": 0.9999913429644723, "No": 8.450693913719893e-06}, "ground_truth": 0}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 3.459778781207086e-07}, "ground_truth": 0}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.999694378111887, "res": {"Yes": 0.999694378111887, "No": 0.00030488497001093546}, "ground_truth": 0}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.7012198591879797e-06, "res": {"No": 0.9999968263007362, "Yes": 1.7012198591879797e-06}, "ground_truth": 0}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 8.897023156160694e-08}, "ground_truth": 1}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.972715200053074e-08}, "ground_truth": 0}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 2.09789330049822e-07}, "ground_truth": 0}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999598739650709, "res": {"Yes": 0.9999598739650709, "No": 3.947709214448496e-05}, "ground_truth": 0}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988989690798266, "res": {"Yes": 0.9988989690798266, "No": 0.0010999700795025268}, "ground_truth": 0}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.005867769048157e-08}, "ground_truth": 1}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.0851613123933727e-07}, "ground_truth": 0}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 8.131819863445901e-08}, "ground_truth": 0}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 2.073364116108077e-05, "res": {"No": 0.9999784691637917, "Yes": 2.073364116108077e-05}, "ground_truth": 0}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0009882128976284703, "res": {"No": 0.9990110281553205, "Yes": 0.0009882128976284703}, "ground_truth": 0}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9934767642840484, "res": {"Yes": 0.9934767642840484, "No": 0.006522536140968615}, "ground_truth": 1}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995531034080428, "res": {"Yes": 0.9995531034080428, "No": 0.0004447734084393063}, "ground_truth": 0}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999312666422455, "res": {"Yes": 0.9999312666422455, "No": 6.814491618748012e-05}, "ground_truth": 0}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9996354030816162, "res": {"Yes": 0.9996354030816162, "No": 0.00036423204761221786}, "ground_truth": 0}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 2.0140641603391258e-07}, "ground_truth": 0}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999940846288958, "res": {"Yes": 0.9999940846288958, "No": 5.620218588860624e-06}, "ground_truth": 1}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998821593137591, "res": {"Yes": 0.9998821593137591, "No": 0.00011448748983753684}, "ground_truth": 0}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999985978860297, "res": {"Yes": 0.999985978860297, "No": 1.3426891951669945e-05}, "ground_truth": 0}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9565434405313206, "res": {"Yes": 0.9565434405313206, "No": 0.04345574130919688}, "ground_truth": 0}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999996945503965, "res": {"Yes": 0.999996945503965, "No": 3.0353737330383343e-06}, "ground_truth": 0}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, " Yes": 1.8957615875407695e-07}, "ground_truth": 1}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9961731807074063, "res": {"Yes": 0.9961731807074063, "No": 0.003826414791353493}, "ground_truth": 0}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.0926496859989826e-07}, "ground_truth": 0}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0029927328870040237, "res": {"No": 0.9970070522249794, "Yes": 0.0029927328870040237}, "ground_truth": 0}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.936169789422204, "res": {"Yes": 0.936169789422204, "No": 0.06382926466439119}, "ground_truth": 0}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999976607241361, "res": {"Yes": 0.9999976607241361, "No": 1.7779705225557694e-06}, "ground_truth": 1}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 1.2137558470615725e-06}, "ground_truth": 0}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988965912036609, "res": {"Yes": 0.9988965912036609, "No": 0.001102500496834321}, "ground_truth": 0}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.965863291471875, "res": {"Yes": 0.965863291471875, "No": 0.03413480297716808}, "ground_truth": 0}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9954856355143722, "res": {"Yes": 0.9954856355143722, "No": 0.004513524252120998}, "ground_truth": 0}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999937270200753, "res": {"Yes": 0.9999937270200753, "No": 5.793169539548666e-06}, "ground_truth": 1}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988636455402717, "res": {"Yes": 0.9988636455402717, "No": 0.00113498970338206}, "ground_truth": 0}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.00013157993767141384, "res": {"No": 0.9998679831849867, "Yes": 0.00013157993767141384}, "ground_truth": 0}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.7867645403667213e-05, "res": {"No": 0.9999819260003368, "Yes": 1.7867645403667213e-05}, "ground_truth": 0}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 2.54151828121122e-07}, "ground_truth": 0}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 2.7583113174066395e-07}, "ground_truth": 1}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.719563794750548e-07}, "ground_truth": 0}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999800187796273, "res": {"Yes": 0.9999800187796273, "No": 1.9277672442621797e-05}, "ground_truth": 0}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.96674675703747, "res": {"Yes": 0.96674675703747, "No": 0.03325232132604623}, "ground_truth": 0}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995943156621536, "res": {"Yes": 0.9995943156621536, "No": 0.00040503381520899347}, "ground_truth": 0}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999248301053981, "res": {"Yes": 0.9999248301053981, "No": 7.447411072651233e-05}, "ground_truth": 1}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999291211320257, "res": {"Yes": 0.9999291211320257, "No": 7.079628715169046e-05}, "ground_truth": 0}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999285251444492, "res": {"Yes": 0.9999285251444492, "No": 7.104280324258338e-05}, "ground_truth": 0}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.006671328256330399, "res": {"No": 0.9933284204686071, "Yes": 0.006671328256330399}, "ground_truth": 0}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999970647075079, "res": {"Yes": 0.9999970647075079, "No": 2.5622186875355416e-06}, "ground_truth": 0}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999974223173222, "res": {"Yes": 0.9999974223173222, "No": 2.2506691184063673e-06}, "ground_truth": 1}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999951574563252, "res": {"Yes": 0.9999951574563252, "No": 4.543476909132994e-06}, "ground_truth": 0}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 4.801376515950348e-07}, "ground_truth": 0}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9865442341911447, "res": {"Yes": 0.9865442341911447, "No": 0.013454804833993438}, "ground_truth": 0}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999976607241361, "res": {"Yes": 0.9999976607241361, "No": 1.8281500574149152e-06}, "ground_truth": 0}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 7.562332323094195e-07}, "ground_truth": 1}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 1.8501480278778753e-07}, "ground_truth": 0}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999953958625991, "res": {"Yes": 0.9999953958625991, "No": 4.047601801549946e-06}, "ground_truth": 0}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9997570363904394, "res": {"Yes": 0.9997570363904394, "No": 0.00024233657313328116}, "ground_truth": 0}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.1298203648842328e-07}, "ground_truth": 0}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 4.0776165995952605e-07}, "ground_truth": 1}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.9113789321310435e-08}, "ground_truth": 0}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "Tr": 1.9697795855021078e-07}, "ground_truth": 0}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9980431882573865, "res": {"Yes": 0.9980431882573865, "No": 0.001955964806731375}, "ground_truth": 0}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.5806616788995036, "res": {"Yes": 0.5806616788995036, "No": 0.41933720396134533}, "ground_truth": 0}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99970998549234, "res": {"Yes": 0.99970998549234, "No": 0.00028926422910429897}, "ground_truth": 1}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 2.014503274187751e-07}, "ground_truth": 0}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.0645047323590865e-07}, "ground_truth": 0}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 5.573417747577735e-05, "res": {"No": 0.999943901441583, "Yes": 5.573417747577735e-05}, "ground_truth": 0}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9816479951167805, "res": {"Yes": 0.9816479951167805, "No": 0.01835119654124609}, "ground_truth": 0}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.2126761976947723e-07}, "ground_truth": 1}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.7907312296059256e-07}, "ground_truth": 0}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 2.942538519948855e-07}, "ground_truth": 0}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999026601378318, "res": {"Yes": 0.9999026601378318, "No": 9.625823788916561e-05}, "ground_truth": 0}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.4963616921317078, "res": {"No": 0.5036380871026804, "Yes": 0.4963616921317078}, "ground_truth": 0}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.968513714386062e-08}, "ground_truth": 1}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 8.707124956027432e-07}, "ground_truth": 0}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 1.8622019860113403e-07}, "ground_truth": 0}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.8724288111844651, "res": {"Yes": 0.8724288111844651, "No": 0.12757010785179798}, "ground_truth": 0}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.976549138803505e-05, "res": {"No": 0.9999795419732683, "Yes": 1.976549138803505e-05}, "ground_truth": 0}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 2.328142294244035e-07}, "ground_truth": 1}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "yes": 2.9928306670989537e-07}, "ground_truth": 0}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 1.946910293201051e-07}, "ground_truth": 0}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999906277489198, "res": {"Yes": 0.9999906277489198, "No": 8.98495541410346e-06}, "ground_truth": 0}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.016605251138466774, "res": {"No": 0.9833945195312981, "Yes": 0.016605251138466774}, "ground_truth": 0}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 3.1167557931565196e-07}, "ground_truth": 1}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 9.726331000579989e-08}, "ground_truth": 0}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997401283924773, "res": {"Yes": 0.9997401283924773, "No": 0.0002597279034839982}, "ground_truth": 0}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.999996945503965, "res": {"Yes": 0.999996945503965, "No": 2.734953839306602e-06}, "ground_truth": 0}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 9.903297118355032e-08}, "ground_truth": 0}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "YES": 6.655161665924305e-08}, "ground_truth": 1}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998702477485516, "res": {"Yes": 0.9998702477485516, "No": 0.0001293363089407413}, "ground_truth": 0}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 1.522228913104516e-07}, "ground_truth": 0}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.003071649393285155, "res": {"No": 0.9969272013393631, "Yes": 0.003071649393285155}, "ground_truth": 0}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.561716532950554e-06, "res": {"No": 0.9999973031140366, "Yes": 1.561716532950554e-06}, "ground_truth": 0}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9978780435395311, "res": {"Yes": 0.9978780435395311, "No": 0.002119381453685515}, "ground_truth": 1}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999183936139823, "res": {"Yes": 0.9999183936139823, "No": 8.106964158745715e-05}, "ground_truth": 0}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 6.886259559219882e-06, "res": {"No": 0.9999924157887603, "Yes": 6.886259559219882e-06}, "ground_truth": 0}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, " Yes": 2.7123126597685246e-07}, "ground_truth": 0}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 4.360547216967838e-06, "res": {"No": 0.9999944422379444, "Yes": 4.360547216967838e-06}, "ground_truth": 0}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990288618601624, "res": {"Yes": 0.9990288618601624, "No": 0.0009693401264247553}, "ground_truth": 1}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 2.760745221182371e-07}, "ground_truth": 0}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 7.243537202412746e-07}, "ground_truth": 0}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999977799274644, "res": {"Yes": 0.9999977799274644, "No": 1.6287462677356326e-06}, "ground_truth": 0}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987279553355669, "res": {"Yes": 0.9987279553355669, "No": 0.0012718963541125687}, "ground_truth": 0}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 2.4905402403973295e-07}, "ground_truth": 1}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999950382530095, "res": {"Yes": 0.9999950382530095, "Foot": 3.3768305347781777e-06}, "ground_truth": 0}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999959918780326, "res": {"Yes": 0.9999959918780326, "No": 3.7210190091483716e-06}, "ground_truth": 0}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.09702105133043094, "res": {"No": 0.9029777824686579, "Yes": 0.09702105133043094}, "ground_truth": 0}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999820452021894, "res": {"Yes": 0.9999820452021894, "No": 1.7672858735974226e-05}, "ground_truth": 0}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999959918780326, "res": {"Yes": 0.9999959918780326, "No": 3.5731035431921625e-06}, "ground_truth": 1}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "YES": 1.5218529689939885e-07}, "ground_truth": 0}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999497421129699, "res": {"Yes": 0.9999497421129699, "No": 4.971852290645942e-05}, "ground_truth": 0}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999921773835968, "res": {"Yes": 0.9999921773835968, "No": 7.167397069625432e-06}, "ground_truth": 0}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0002070465315217993, "res": {"No": 0.9997924282260023, "Yes": 0.0002070465315217993}, "ground_truth": 0}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999978991308068, "res": {"Yes": 0.9999978991308068, "No": 1.5190308881926653e-06}, "ground_truth": 1}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 5.102454257601105e-07}, "ground_truth": 0}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.0797209746582237e-07}, "ground_truth": 0}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9981385430639648, "res": {"Yes": 0.9981385430639648, "No": 0.0018608788553035082}, "ground_truth": 0}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996824614560386, "res": {"Yes": 0.9996824614560386, "No": 0.0003169980295617007}, "ground_truth": 0}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "S": 1.2139556392122235e-07}, "ground_truth": 1}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.7306648671630473e-07}, "ground_truth": 0}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 8.068718333006177e-07}, "ground_truth": 0}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0007334864746643392, "res": {"No": 0.9992656880390844, "Yes": 0.0007334864746643392}, "ground_truth": 0}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.000707293389550155, "res": {"No": 0.9992919873593172, "Yes": 0.000707293389550155}, "ground_truth": 0}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, " Yes": 3.4751674015289316e-07}, "ground_truth": 1}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7538222360327524, "res": {"Yes": 0.7538222360327524, "No": 0.24617593887588854}, "ground_truth": 0}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9786755587900324, "res": {"Yes": 0.9786755587900324, "No": 0.021322790537481748}, "ground_truth": 0}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.8721539783369914, "res": {"Yes": 0.8721539783369914, "No": 0.12784554798828188}, "ground_truth": 0}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 2.6841268108859894e-05, "res": {"No": 0.9999728667271139, "Yes": 2.6841268108859894e-05}, "ground_truth": 0}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999838332276837, "res": {"Yes": 0.9999838332276837, "No": 1.5731035711359707e-05}, "ground_truth": 1}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995553672784393, "res": {"Yes": 0.9995553672784393, "No": 0.00044411474244005823}, "ground_truth": 0}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9783963541060093, "res": {"Yes": 0.9783963541060093, "No": 0.021603144417559488}, "ground_truth": 0}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.7004877085779931, "res": {"Yes": 0.7004877085779931, "No": 0.2995113097608116}, "ground_truth": 0}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.002481699428640844, "res": {"No": 0.9975179161767183, "Yes": 0.002481699428640844}, "ground_truth": 0}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.389482882518538e-07}, "ground_truth": 1}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.3676234944501058e-06}, "ground_truth": 0}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.3749943724216239, "res": {"No": 0.6250052058002649, "Yes": 0.3749943724216239}, "ground_truth": 0}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.999897415702129, "res": {"Yes": 0.999897415702129, "No": 0.0001017548570139502}, "ground_truth": 0}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.1222869891664491e-06}, "ground_truth": 0}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999510532879294, "res": {"Yes": 0.9999510532879294, "No": 4.867660662792964e-05}, "ground_truth": 1}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 6.236627995339716e-07}, "ground_truth": 0}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999953958625991, "res": {"Yes": 0.9999953958625991, "No": 4.080663337674466e-06}, "ground_truth": 0}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9996974728005846, "res": {"Yes": 0.9996974728005846, "No": 0.0003013087715214591}, "ground_truth": 0}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 2.3694566113757575e-07}, "ground_truth": 0}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.2180566159906002e-07}, "ground_truth": 1}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.139174352330569e-08}, "ground_truth": 0}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 4.616637614042811e-07}, "ground_truth": 0}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999628539429318, "res": {"Yes": 0.9999628539429318, "No": 3.6736490695844006e-05}, "ground_truth": 0}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 2.429646202220482e-07}, "ground_truth": 0}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.0852481287709697e-07}, "ground_truth": 1}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999977799274644, "res": {"Yes": 0.9999977799274644, "No": 1.4829206539237062e-06}, "ground_truth": 0}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 5.591405300206829e-07}, "ground_truth": 0}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 5.342911082441373e-07, "res": {"No": 0.999999091165773, "Yes": 5.342911082441373e-07}, "ground_truth": 0}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.2843434461513383e-07}, "ground_truth": 0}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "YES": 1.253793368795455e-07}, "ground_truth": 1}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.2541620382376314e-07}, "ground_truth": 0}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.364895054382339e-08}, "ground_truth": 0}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.6179026516256704e-07}, "ground_truth": 0}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.3661501177281332e-07}, "ground_truth": 0}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.651685142587639e-08}, "ground_truth": 1}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "1": 5.866608220774092e-07}, "ground_truth": 0}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.4417009776496827e-07}, "ground_truth": 0}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.006575823905154838, "res": {"No": 0.9934238203376329, "Yes": 0.006575823905154838}, "ground_truth": 0}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9861864449636161, "res": {"Yes": 0.9861864449636161, "No": 0.013813015211772353}, "ground_truth": 0}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999974223173222, "res": {"Yes": 0.9999974223173222, "No": 2.105606518559024e-06}, "ground_truth": 1}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.8025049744254993e-07}, "ground_truth": 0}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.874289895748145e-07}, "ground_truth": 0}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.04893864051303761, "res": {"No": 0.9510605106663648, "Yes": 0.04893864051303761}, "ground_truth": 0}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.191788917451188, "res": {"No": 0.8082103663126614, "Yes": 0.191788917451188}, "ground_truth": 0}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.205920426720005e-08}, "ground_truth": 1}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 7.75899372333239e-07}, "ground_truth": 0}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.059065970468285e-08}, "ground_truth": 0}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.16135000975912053, "res": {"No": 0.8386490127764985, "Yes": 0.16135000975912053}, "ground_truth": 0}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.239396603320445e-07}, "ground_truth": 0}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 3.103779774783061e-07}, "ground_truth": 1}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9882836977306354, "res": {"Yes": 0.9882836977306354, "No": 0.011715838906302596}, "ground_truth": 0}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.391719095544091e-08}, "ground_truth": 0}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9953546352384853, "res": {"Yes": 0.9953546352384853, "No": 0.004644760355379457}, "ground_truth": 0}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.950353036549192, "res": {"Yes": 0.950353036549192, "No": 0.04964638007554461}, "ground_truth": 0}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 1.1169990291836165e-07}, "ground_truth": 1}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 4.672445755443549e-07}, "ground_truth": 0}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.008494930835837e-07}, "ground_truth": 0}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 6.870855036589494e-06, "res": {"No": 0.9999925349918634, "Yes": 6.870855036589494e-06}, "ground_truth": 0}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 4.079011382966683e-07}, "ground_truth": 0}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.47688977706982e-07}, "ground_truth": 1}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 4.462604887174681e-07}, "ground_truth": 0}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 3.8350621893976814e-07}, "ground_truth": 0}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9987454378219454, "res": {"Yes": 0.9987454378219454, "No": 0.0012542782141692717}, "ground_truth": 0}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9980646746618383, "res": {"Yes": 0.9980646746618383, "No": 0.0019347754611170982}, "ground_truth": 0}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.2926484179693263e-07}, "ground_truth": 1}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998566603941031, "res": {"Yes": 0.9998566603941031, "No": 0.00014034021165254553}, "ground_truth": 0}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999921773835968, "res": {"Yes": 0.9999921773835968, "No": 7.581026085432786e-06}, "ground_truth": 0}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9900086257625439, "res": {"Yes": 0.9900086257625439, "No": 0.00999074614421657}, "ground_truth": 0}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.1599937891910813e-07}, "ground_truth": 0}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996931864469076, "res": {"Yes": 0.9996931864469076, "No": 0.00030642424335703306}, "ground_truth": 1}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.190379126606832e-08}, "ground_truth": 0}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.206741378284573e-07}, "ground_truth": 0}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 9.682425760330294e-05, "res": {"No": 0.9999027793262361, "Yes": 9.682425760330294e-05}, "ground_truth": 0}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999863364673469, "res": {"Yes": 0.9999863364673469, "No": 1.3373759729321009e-05}, "ground_truth": 0}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999776347571058, "res": {"Yes": 0.9999776347571058, "No": 2.1914369385082398e-05}, "ground_truth": 1}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 4.845016500235628e-07}, "ground_truth": 0}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999695291212467, "res": {"Yes": 0.9999695291212467, "No": 2.9975535269873764e-05}, "ground_truth": 0}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9987397270118591, "res": {"Yes": 0.9987397270118591, "No": 0.001259833764070616}, "ground_truth": 0}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999250684975053, "res": {"Yes": 0.9999250684975053, "No": 7.476004301519031e-05}, "ground_truth": 0}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.9330360166464176e-07}, "ground_truth": 1}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999635045582071, "res": {"Yes": 0.999635045582071, "No": 0.00036467956283922653}, "ground_truth": 0}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 1.2096118922664333e-06}, "ground_truth": 0}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9996998561521797, "res": {"Yes": 0.9996998561521797, "No": 0.0002992617348777945}, "ground_truth": 0}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.7982314443010321e-07}, "ground_truth": 0}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.302863651853091e-07}, "ground_truth": 1}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.4592632002258836e-07}, "ground_truth": 0}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.79848672784834e-08}, "ground_truth": 0}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 2.6456200597686694e-05, "res": {"No": 0.9999732243284747, "Yes": 2.6456200597686694e-05}, "ground_truth": 0}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993344881513125, "res": {"Yes": 0.9993344881513125, "No": 0.0006649963540200941}, "ground_truth": 0}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999894357248024, "res": {"Yes": 0.9999894357248024, "No": 9.579415311945927e-06}, "ground_truth": 1}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 5.945167733464382e-08}, "ground_truth": 0}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999931310055916, "res": {"Yes": 0.9999931310055916, "No": 6.011273109420639e-06}, "ground_truth": 0}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, " Yes": 7.458592652602223e-08}, "ground_truth": 0}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.182953941929747e-07}, "ground_truth": 0}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 2.864327665669238e-07}, "ground_truth": 1}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 2.6316569082511303e-07}, "ground_truth": 0}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.0912629016921922e-07}, "ground_truth": 0}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999236381607137, "res": {"Yes": 0.9999236381607137, "No": 7.591623473943707e-05}, "ground_truth": 0}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 1.1213569987867037e-06}, "ground_truth": 0}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "YES": 3.70536120028754e-08}, "ground_truth": 1}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 5.880752582563457e-08}, "ground_truth": 0}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.7232671075531e-08}, "ground_truth": 0}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 2.2120849010873275e-05, "res": {"No": 0.9999772771531678, "Yes": 2.2120849010873275e-05}, "ground_truth": 0}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999720323251027, "res": {"Yes": 0.9999720323251027, "No": 2.7267419319500128e-05}, "ground_truth": 0}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999963494876631, "res": {"Yes": 0.9999963494876631, "No": 3.0337593436780042e-06}, "ground_truth": 1}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 1.8392771679979776e-07}, "ground_truth": 0}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988736326284726, "res": {"Yes": 0.9988736326284726, "No": 0.0011256401307912754}, "ground_truth": 0}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999872900832717, "res": {"Yes": 0.9999872900832717, "No": 1.207720217877639e-05}, "ground_truth": 0}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 1.769061448663234e-07}, "ground_truth": 0}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999899125338788, "res": {"Yes": 0.9999899125338788, "No": 9.171708270547694e-06}, "ground_truth": 1}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 1.9509211603511328e-07}, "ground_truth": 0}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.540325674502434e-08}, "ground_truth": 0}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.2353862212636528e-07}, "ground_truth": 0}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999628539429318, "res": {"Yes": 0.9999628539429318, "No": 3.668483879906544e-05}, "ground_truth": 0}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.6360726609179468e-07}, "ground_truth": 1}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.330924283620569e-07}, "ground_truth": 0}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 1.7547557105447294e-07}, "ground_truth": 0}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0004046002562051934, "res": {"No": 0.9995950305922557, "Yes": 0.0004046002562051934}, "ground_truth": 0}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 2.3974614897337588e-05, "res": {"No": 0.9999756083404814, "Yes": 2.3974614897337588e-05}, "ground_truth": 0}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999917005724405, "res": {"Yes": 0.9999917005724405, "No": 7.656757245043682e-06}, "ground_truth": 1}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 6.125820158887282e-07}, "ground_truth": 0}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 4.960312044513852e-07}, "ground_truth": 0}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.4172574784738882e-07}, "ground_truth": 0}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, " Yes": 4.262115575422476e-07}, "ground_truth": 0}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999934886141991, "res": {"Yes": 0.9999934886141991, "No": 5.415214857986285e-06}, "ground_truth": 1}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 5.729290212892521e-07}, "ground_truth": 0}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999976607241361, "res": {"Yes": 0.9999976607241361, "No": 1.5145177295663998e-06}, "ground_truth": 0}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999959918780326, "res": {"Yes": 0.9999959918780326, "No": 3.582416447201279e-06}, "ground_truth": 0}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9965402846548899, "res": {"Yes": 0.9965402846548899, "No": 0.003459115702119808}, "ground_truth": 0}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999963494876631, "res": {"Yes": 0.9999963494876631, "No": 3.166723819264392e-06}, "ground_truth": 1}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.462375117851028e-08}, "ground_truth": 0}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999975415208221, "res": {"Yes": 0.9999975415208221, "No": 1.8568241051084122e-06}, "ground_truth": 0}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.8444039278530044e-07}, "ground_truth": 0}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999977799274644, "res": {"Yes": 0.9999977799274644, "No": 2.0292363864265917e-06}, "ground_truth": 0}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999876476902904, "res": {"Yes": 0.9999876476902904, "No": 1.2038773404211565e-05}, "ground_truth": 1}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 4.899777245358727e-08}, "ground_truth": 0}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 2.3926765888390665e-07}, "ground_truth": 0}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9919776859026571, "res": {"Yes": 0.9919776859026571, "No": 0.008021420925831623}, "ground_truth": 0}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.21586487028464e-07}, "ground_truth": 0}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.421894003165787e-08}, "ground_truth": 1}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.6457171118335494e-08}, "ground_truth": 0}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.382810446661121e-07}, "ground_truth": 0}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.040643526717946556, "res": {"No": 0.9593560268264228, "Yes": 0.040643526717946556}, "ground_truth": 0}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.585815138290501e-06}, "ground_truth": 0}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 1.6706548093067953e-07}, "ground_truth": 1}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.3773917174843487e-07}, "ground_truth": 0}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 7.598104011154217e-08}, "ground_truth": 0}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999840716318578, "res": {"Yes": 0.9999840716318578, "No": 1.499979535432697e-05}, "ground_truth": 0}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 7.676515582048394e-08}, "ground_truth": 0}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.2127295566213598e-07}, "ground_truth": 1}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999926541946805, "res": {"Yes": 0.9999926541946805, "No": 3.4631499253251333e-06}, "ground_truth": 0}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 5.215394073355018e-07}, "ground_truth": 0}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.06518747041929766, "res": {"No": 0.9348120929477564, "Yes": 0.06518747041929766}, "ground_truth": 0}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.6124604132993198, "res": {"Yes": 0.6124604132993198, "No": 0.3875388023907479}, "ground_truth": 0}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.4090352184548246e-08}, "ground_truth": 1}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 8.114842107865902e-08}, "ground_truth": 0}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999980183344636, "res": {"Yes": 0.9999980183344636, "No": 1.783524669805621e-06}, "ground_truth": 0}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.44653488134660735, "res": {"No": 0.5534642931141066, "Yes": 0.44653488134660735}, "ground_truth": 0}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 9.70145283364456e-07}, "ground_truth": 0}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 1.1710357937343864e-07}, "ground_truth": 1}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 9.469098846942957e-08}, "ground_truth": 0}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.5785893802665128e-07}, "ground_truth": 0}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9904320048492165, "res": {"Yes": 0.9904320048492165, "No": 0.009567269815075864}, "ground_truth": 0}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 3.0259880037256396e-07}, "ground_truth": 0}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.3246557984793511e-07}, "ground_truth": 1}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.0549549430054125e-06}, "ground_truth": 0}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 3.582872109353177e-08}, "ground_truth": 0}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9989316779627432, "res": {"Yes": 0.9989316779627432, "No": 0.0010676272673154448}, "ground_truth": 0}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9928922180204526, "res": {"Yes": 0.9928922180204526, "No": 0.007107252765926154}, "ground_truth": 0}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "YES": 6.833833185867765e-08}, "ground_truth": 1}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.4358365642584433e-07}, "ground_truth": 0}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.537564282891013e-08}, "ground_truth": 0}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.140822826139807e-08}, "ground_truth": 0}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.3664823562554244, "res": {"No": 0.6335171042373671, "Yes": 0.3664823562554244}, "ground_truth": 0}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999822836059372, "res": {"Yes": 0.9999822836059372, "No": 1.7203322916232112e-05}, "ground_truth": 1}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 2.914314563419168e-07}, "ground_truth": 0}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9985319250778377, "res": {"Yes": 0.9985319250778377, "No": 0.001466198913802476}, "ground_truth": 0}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.701410564703701e-07}, "ground_truth": 0}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.008221587187907666, "res": {"No": 0.9917780605625641, "Yes": 0.008221587187907666}, "ground_truth": 0}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.2054484446632927e-07}, "ground_truth": 1}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995748971818182, "res": {"Yes": 0.9995748971818182, "4": 0.0004205172738856003}, "ground_truth": 0}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.926469216920568e-07}, "ground_truth": 0}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999007530713023, "res": {"Yes": 0.9999007530713023, "No": 9.843648894814902e-05}, "ground_truth": 0}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 4.851064429931483e-07}, "ground_truth": 0}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999958726752174, "res": {"Yes": 0.9999958726752174, "No": 3.3014493134889437e-06}, "ground_truth": 1}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.2108754991154005e-07}, "ground_truth": 0}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 9.264146586559401e-08}, "ground_truth": 0}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9322082314028892, "res": {"Yes": 0.9322082314028892, "No": 0.06779014151443433}, "ground_truth": 0}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 9.95450936440768e-07, "res": {"No": 0.9999983759447187, "Yes": 9.95450936440768e-07}, "ground_truth": 0}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 6.35053239395824e-07}, "ground_truth": 1}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 3.9234122376498864e-08}, "ground_truth": 0}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999939654258081, "res": {"Yes": 0.9999939654258081, "No": 5.3854106076653175e-06}, "ground_truth": 0}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0024639710070485433, "res": {"No": 0.9975354709507291, "Yes": 0.0024639710070485433}, "ground_truth": 0}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 2.343790875876136e-07}, "ground_truth": 0}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999695291212467, "res": {"Yes": 0.9999695291212467, "No": 2.8749275242361623e-05}, "ground_truth": 1}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.1970062536021692e-07}, "ground_truth": 0}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, " Yes": 2.609027772440745e-07}, "ground_truth": 0}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.5296877826580173e-07}, "ground_truth": 0}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999471197682087, "res": {"Yes": 0.9999471197682087, "No": 5.2521234682491445e-05}, "ground_truth": 0}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 5.7989724841469803e-08}, "ground_truth": 1}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.1999701133694163e-07}, "ground_truth": 0}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.793625485732533e-08}, "ground_truth": 0}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.000322902867429326, "res": {"No": 0.9996765068551944, "Yes": 0.000322902867429326}, "ground_truth": 0}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999976607241361, "res": {"Yes": 0.9999976607241361, "No": 1.98122345008227e-06}, "ground_truth": 1}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998446263017435, "res": {"Yes": 0.9998446263017435, "No": 0.0001548243979084445}, "ground_truth": 0}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.441610153349085e-07}, "ground_truth": 0}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9931543601795777, "res": {"Yes": 0.9931543601795777, "No": 0.006844373849695085}, "ground_truth": 0}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999987886094374, "res": {"Yes": 0.999987886094374, "No": 1.1642946639198358e-05}, "ground_truth": 0}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991516618424543, "res": {"Yes": 0.9991516618424543, "No": 0.0008473662061684938}, "ground_truth": 1}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999866940725246, "res": {"Yes": 0.9999866940725246, "No": 1.2859702154979923e-05}, "ground_truth": 0}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.564581930053103e-07}, "ground_truth": 0}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9944227121422053, "res": {"Yes": 0.9944227121422053, "No": 0.00557632846687551}, "ground_truth": 0}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.28677951468125273, "res": {"No": 0.7132195583468888, "Yes": 0.28677951468125273}, "ground_truth": 0}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998508866845447, "res": {"Yes": 0.998508866845447, "No": 0.0014907826924479186}, "ground_truth": 1}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.0003211934398433966, "res": {"No": 0.9996780560150903, "Yes": 0.0003211934398433966}, "ground_truth": 0}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.407446018306855e-08}, "ground_truth": 0}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9988532939704063, "res": {"Yes": 0.9988532939704063, "No": 0.001146137712423914}, "ground_truth": 0}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999962302846054, "res": {"Yes": 0.9999962302846054, "No": 3.4935295514804564e-06}, "ground_truth": 0}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.549939047982689e-08}, "ground_truth": 1}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 9.571487728999776e-07}, "ground_truth": 0}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 2.1615275070625576e-07}, "ground_truth": 0}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9997718141583503, "res": {"Yes": 0.9997718141583503, "No": 0.00022756127845429205}, "ground_truth": 0}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993810411641001, "res": {"Yes": 0.9993810411641001, "No": 0.0006187467479088965}, "ground_truth": 0}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.020533528768576e-06}, "ground_truth": 1}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999957534720165, "res": {"Yes": 0.9999957534720165, "No": 3.844277034977739e-06}, "ground_truth": 0}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 4.4347296293393364e-07}, "ground_truth": 0}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999940846288958, "res": {"Yes": 0.9999940846288958, "No": 5.760052319794904e-06}, "ground_truth": 0}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.630577563245143e-08}, "ground_truth": 0}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.918270038590081e-08}, "ground_truth": 1}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.36889734480877e-06}, "ground_truth": 0}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.6660633196973206e-08}, "ground_truth": 0}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.6317785549465196e-06, "res": {"No": 0.9999977799274644, "Yes": 1.6317785549465196e-06}, "ground_truth": 0}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999695291212467, "res": {"Yes": 0.9999695291212467, "No": 2.9256040288968385e-05}, "ground_truth": 0}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9909418901100594, "res": {"Yes": 0.9909418901100594, "No": 0.00904895226315023}, "ground_truth": 1}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 2.40835911619195e-07}, "ground_truth": 0}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 5.908675241981232e-07}, "ground_truth": 0}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9889282094731988, "res": {"Yes": 0.9889282094731988, "No": 0.011071317765562569}, "ground_truth": 0}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 2.9016912131304264e-08}, "ground_truth": 0}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.5315939545558394e-08}, "ground_truth": 1}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.051598581321428e-08}, "ground_truth": 0}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.762056479606579e-08}, "ground_truth": 0}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9698119308544848, "res": {"Yes": 0.9698119308544848, "No": 0.0301875346422695}, "ground_truth": 0}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, "YES": 3.312260026812897e-08}, "ground_truth": 0}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 1.8914651567373034e-08}, "ground_truth": 1}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.4990502106611913e-08}, "ground_truth": 0}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.654468787100164e-08}, "ground_truth": 0}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.5007358552194586e-06, "res": {"No": 0.9999981375378344, "Yes": 1.5007358552194586e-06}, "ground_truth": 0}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.06071642648388907, "res": {"No": 0.9392832862272832, "Yes": 0.06071642648388907}, "ground_truth": 0}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.35726101769778e-08}, "ground_truth": 1}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5486043013034408, "res": {"Yes": 0.5486043013034408, "No": 0.4513952669146682}, "ground_truth": 0}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999968263007362, "res": {"Yes": 0.9999968263007362, "No": 2.9191677483432904e-06}, "ground_truth": 0}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 5.479685183034415e-07, "res": {"No": 0.9999988527586581, "Yes": 5.479685183034415e-07}, "ground_truth": 0}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.0967438471336515e-07}, "ground_truth": 0}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 6.287985467351292e-08}, "ground_truth": 1}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.196190411657098e-08}, "ground_truth": 0}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "1": 1.0334054915924839e-07}, "ground_truth": 0}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9992617571754231, "res": {"Yes": 0.9992617571754231, "No": 0.0007380797329293165}, "ground_truth": 0}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.861709961899775e-08}, "ground_truth": 0}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.0128506432537295e-08}, "ground_truth": 1}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.4328943664190934e-08}, "ground_truth": 0}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.5629837649072794e-08}, "ground_truth": 0}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0736090499153486, "res": {"No": 0.9263904693972043, "Yes": 0.0736090499153486}, "ground_truth": 0}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.5656931776696625e-07}, "ground_truth": 0}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.8196612625150486e-08}, "ground_truth": 1}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999995276659155, "res": {"Yes": 0.999995276659155, "No": 4.479549834306068e-06}, "ground_truth": 0}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.622609939977693e-08}, "ground_truth": 0}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999981375378344, "res": {"Yes": 0.9999981375378344, "No": 1.6499624960116795e-06}, "ground_truth": 0}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999639931190339, "res": {"Yes": 0.999639931190339, "No": 0.00035961016077345207}, "ground_truth": 0}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 2.1913168203499026e-07}, "ground_truth": 1}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.40484484014187e-08}, "ground_truth": 0}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.541786256325457e-07}, "ground_truth": 0}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 2.4479735453760934e-06, "res": {"No": 0.9999965878943212, "Yes": 2.4479735453760934e-06}, "ground_truth": 0}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.020764038642196152, "res": {"No": 0.9792359365396321, "Yes": 0.020764038642196152}, "ground_truth": 0}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9938689676547258, "res": {"Yes": 0.9938689676547258, "No": 0.006130383314962558}, "ground_truth": 1}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996169480831258, "res": {"Yes": 0.9996169480831258, "No": 0.0003825416583152158}, "ground_truth": 0}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 3.337208406125509e-07}, "ground_truth": 0}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0017730234957567688, "res": {"No": 0.9982263134444611, "Yes": 0.0017730234957567688}, "ground_truth": 0}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995646574190928, "res": {"Yes": 0.9995646574190928, "No": 0.0004347106154903682}, "ground_truth": 0}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 7.182120336527228e-07}, "ground_truth": 1}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999974223173222, "res": {"Yes": 0.9999974223173222, "No": 2.273288719097528e-06}, "ground_truth": 0}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999975489140396, "res": {"Yes": 0.999975489140396, "No": 2.334947635525441e-05}, "ground_truth": 0}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0001316011237468123, "res": {"No": 0.9998675064280487, "Yes": 0.0001316011237468123}, "ground_truth": 0}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0270005484425963e-05, "res": {"No": 0.9999893165220688, "Yes": 1.0270005484425963e-05}, "ground_truth": 0}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9970742506710762, "res": {"Yes": 0.9970742506710762, "No": 0.0029254323419714953}, "ground_truth": 1}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999933694113825, "res": {"Yes": 0.9999933694113825, "No": 6.476495576397224e-06}, "ground_truth": 0}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.9333716834798893e-05, "res": {"No": 0.9999803763825457, "Yes": 1.9333716834798893e-05}, "ground_truth": 0}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.24985370809931823, "res": {"No": 0.7501457785045818, "Yes": 0.24985370809931823}, "ground_truth": 0}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 9.107003600605106e-08}, "ground_truth": 0}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.773516927460816e-08}, "ground_truth": 1}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.85212677715888e-08}, "ground_truth": 0}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.5270391294515638e-07}, "ground_truth": 0}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.7742696198433263, "res": {"Yes": 0.7742696198433263, "No": 0.2257296804086789}, "ground_truth": 0}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999978991308068, "res": {"Yes": 0.9999978991308068, "No": 1.96137068511151e-06}, "ground_truth": 0}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.700057505962656e-08}, "ground_truth": 1}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 1.111809155073679e-06}, "ground_truth": 0}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 7.686139569924424e-07}, "ground_truth": 0}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.999983356420506, "res": {"Yes": 0.999983356420506, "No": 1.6269294494947867e-05}, "ground_truth": 0}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.09241606054653144, "res": {"No": 0.9075835054968717, "Yes": 0.09241606054653144}, "ground_truth": 0}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.820134965616242e-08}, "ground_truth": 1}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.649380842568143e-08}, "ground_truth": 0}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 2.4226637330432166e-07}, "ground_truth": 0}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 2.0866576137254894e-06, "res": {"No": 0.9999974223173222, "Yes": 2.0866576137254894e-06}, "ground_truth": 0}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997703840457793, "res": {"Yes": 0.9997703840457793, "No": 0.00022935787530805894}, "ground_truth": 0}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999574899895761, "res": {"Yes": 0.9999574899895761, "No": 4.2081598629990595e-05}, "ground_truth": 1}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 1.2036126284667603e-06}, "ground_truth": 0}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.191147415802969e-08}, "ground_truth": 0}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9997039078439373, "res": {"Yes": 0.9997039078439373, "No": 0.0002956883588471488}, "ground_truth": 0}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.00012959984111647182, "res": {"No": 0.9998701285740239, "Yes": 0.00012959984111647182}, "ground_truth": 0}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.209812758946182e-08}, "ground_truth": 1}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 2.760455358152224e-07}, "ground_truth": 0}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.63538892545635e-07}, "ground_truth": 0}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9332898759995706, "res": {"Yes": 0.9332898759995706, "No": 0.06670936707591468}, "ground_truth": 0}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995392894097311, "res": {"Yes": 0.9995392894097311, "No": 0.00046043585786236516}, "ground_truth": 0}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.3811999508632862e-07}, "ground_truth": 1}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999975250738268, "res": {"Yes": 0.999975250738268, "No": 2.454753784989225e-05}, "ground_truth": 0}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.2526816882484034e-08}, "ground_truth": 0}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0012309439067990024, "res": {"No": 0.9987687434191511, "Yes": 0.0012309439067990024}, "ground_truth": 0}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.11266604386589574, "res": {"No": 0.887333766968971, "Yes": 0.11266604386589574}, "ground_truth": 0}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999979303571174, "res": {"Yes": 0.999979303571174, "No": 2.0142249172952506e-05}, "ground_truth": 1}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 4.5447587729315737e-07}, "ground_truth": 0}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998832320179115, "res": {"Yes": 0.9998832320179115, "No": 0.0001160378647340786}, "ground_truth": 0}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9984319698459102, "res": {"Yes": 0.9984319698459102, "No": 0.0015349829584370672}, "ground_truth": 0}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.2253734454394903e-07}, "ground_truth": 0}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 1.8921725210469164e-07}, "ground_truth": 1}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.66549409469576e-08}, "ground_truth": 0}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.853638944216849e-08}, "ground_truth": 0}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0011403835794139543, "res": {"No": 0.9988593627218004, "Yes": 0.0011403835794139543}, "ground_truth": 0}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.7331052349044188, "res": {"Yes": 0.7331052349044188, "No": 0.2668941609782473}, "ground_truth": 0}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.18792750541750472, "res": {"No": 0.8120709222192992, "Yes": 0.18792750541750472}, "ground_truth": 1}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 5.406547131665819e-07}, "ground_truth": 0}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.00014209183273329696, "res": {"No": 0.9998572563188488, "Yes": 0.00014209183273329696}, "ground_truth": 0}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 4.51773901583693e-07}, "ground_truth": 0}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999831180165023, "res": {"Yes": 0.9999831180165023, "No": 1.5801147404975975e-05}, "ground_truth": 0}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.0557491935407583e-07}, "ground_truth": 1}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, " Yes": 2.973256006961465e-07}, "ground_truth": 0}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.3249393051543578e-07}, "ground_truth": 0}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "YES": 1.1868570170500156e-07}, "ground_truth": 0}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "YES": 8.260951246794928e-08}, "ground_truth": 0}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.376169786458272e-08}, "ground_truth": 1}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "YES": 5.72050303068454e-08}, "ground_truth": 0}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999980183344636, "res": {"Yes": 0.9999980183344636, "No": 7.847583507452379e-07}, "ground_truth": 0}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0005030034646077931, "res": {"No": 0.9994966607177397, "Yes": 0.0005030034646077931}, "ground_truth": 0}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.392158261553319e-08}, "ground_truth": 1}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.481817893646654e-08}, "ground_truth": 0}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "1": 3.0474727816344284e-07}, "ground_truth": 0}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.11046914806946592, "res": {"No": 0.8895298093134021, "Yes": 0.11046914806946592}, "ground_truth": 0}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9982063596988235, "res": {"Yes": 0.9982063596988235, "No": 0.0017927652836202978}, "ground_truth": 0}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999967070975216, "res": {"Yes": 0.9999967070975216, "No": 2.362188590055097e-06}, "ground_truth": 1}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, " Yes": 1.7881429996001748e-07}, "ground_truth": 0}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999301938865601, "res": {"Yes": 0.9999301938865601, "No": 6.924199605569055e-05}, "ground_truth": 0}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9997255894374677, "res": {"Yes": 0.9997255894374677, "No": 0.00027347865035729093}, "ground_truth": 0}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.345677531670809e-08}, "ground_truth": 0}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999529604543941, "res": {"Yes": 0.9999529604543941, "No": 4.679908062493407e-05}, "ground_truth": 1}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.9183306101871705e-08}, "ground_truth": 0}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 3.7540030249295574e-08}, "ground_truth": 0}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.012941992782857695, "res": {"No": 0.9870577881221829, "Yes": 0.012941992782857695}, "ground_truth": 0}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 2.837125971660941e-06, "res": {"No": 0.9999967070975216, "Yes": 2.837125971660941e-06}, "ground_truth": 0}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "YES": 2.518596119769022e-07}, "ground_truth": 1}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "YES": 2.0143018339325132e-07}, "ground_truth": 0}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.769703795607822e-08}, "ground_truth": 0}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 3.4597677584106486e-05, "res": {"No": 0.9999649995325307, "Yes": 3.4597677584106486e-05}, "ground_truth": 0}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999973031140366, "res": {"Yes": 0.9999973031140366, "No": 2.6753107931768682e-06}, "ground_truth": 0}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994990399224226, "res": {"Yes": 0.9994990399224226, "No": 0.0004967274860776522}, "ground_truth": 1}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.015767853502269474, "res": {"No": 0.9840851845619927, "Yes": 0.015767853502269474}, "ground_truth": 0}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999357960811598, "res": {"Yes": 0.9999357960811598, "2": 4.892333876427751e-05}, "ground_truth": 0}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0006120646767939066, "res": {"No": 0.9993872322788702, "Yes": 0.0006120646767939066}, "ground_truth": 0}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999417558982535, "res": {"Yes": 0.9999417558982535, "No": 5.803878871483845e-05}, "ground_truth": 0}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.193446667631049e-08}, "ground_truth": 1}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 2.615736365907664e-07}, "ground_truth": 0}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9838837786830594, "res": {"Yes": 0.9838837786830594, "No": 0.016111902410007102}, "ground_truth": 0}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.06791172356792251, "res": {"No": 0.932086516259724, "Yes": 0.06791172356792251}, "ground_truth": 0}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0010699348716260903, "res": {"No": 0.9989294154849478, "Yes": 0.0010699348716260903}, "ground_truth": 0}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 1.358480177021321e-06}, "ground_truth": 1}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.4545150672597665e-06}, "ground_truth": 0}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9597464442299923, "res": {"Yes": 0.9597464442299923, "No": 0.04025278842006493}, "ground_truth": 0}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999906277489198, "res": {"Yes": 0.9999906277489198, "No": 9.204344105254079e-06}, "ground_truth": 0}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999850252451228, "res": {"Yes": 0.9999850252451228, "No": 1.475795940563992e-05}, "ground_truth": 0}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999435438517084, "res": {"Yes": 0.9999435438517084, "No": 5.6063084070311065e-05}, "ground_truth": 1}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.9120817569041583e-07}, "ground_truth": 0}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.22700686946460635, "res": {"No": 0.7729919534169957, "Yes": 0.22700686946460635}, "ground_truth": 0}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0004690330138458404, "res": {"No": 0.9995304724325839, "Yes": 0.0004690330138458404}, "ground_truth": 0}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 9.419864874498938e-08}, "ground_truth": 0}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 7.80271495239142e-08}, "ground_truth": 1}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 2.0113691331517983e-07}, "ground_truth": 0}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999775155557789, "res": {"Yes": 0.9999775155557789, "No": 2.1760873376376115e-05}, "ground_truth": 0}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 1.9115523314771584e-07}, "ground_truth": 0}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.008512105093059566, "res": {"No": 0.9914874665498147, "Yes": 0.008512105093059566}, "ground_truth": 0}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999996945503965, "res": {"Yes": 0.999996945503965, "No": 2.2900749767390847e-06}, "ground_truth": 1}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 3.013685206412867e-07}, "ground_truth": 0}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.00011590646828348004, "res": {"No": 0.9998834704001013, "Yes": 0.00011590646828348004}, "ground_truth": 0}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.1728878950538021, "res": {"No": 0.8271115234725989, "Yes": 0.1728878950538021}, "ground_truth": 0}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999757275415809, "res": {"Yes": 0.9999757275415809, "No": 2.4091342846245897e-05}, "ground_truth": 0}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999648803297101, "res": {"Yes": 0.9999648803297101, "No": 3.496779720028932e-05}, "ground_truth": 1}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998358066312069, "res": {"Yes": 0.9998358066312069, "No": 0.00016383233510727365}, "ground_truth": 0}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999975250738268, "res": {"Yes": 0.999975250738268, "No": 2.4283680927239986e-05}, "ground_truth": 0}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9721686726924167, "res": {"Yes": 0.9721686726924167, "No": 0.027830570616705696}, "ground_truth": 0}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999732243284747, "res": {"Yes": 0.9999732243284747, "No": 2.60740236940722e-05}, "ground_truth": 0}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999932502087799, "res": {"Yes": 0.9999932502087799, "No": 5.415691417862114e-06}, "ground_truth": 1}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999940846288958, "res": {"Yes": 0.9999940846288958, "No": 4.4479021495954424e-06}, "ground_truth": 0}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999968263007362, "res": {"Yes": 0.9999968263007362, "No": 2.6755060979933105e-06}, "ground_truth": 0}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.99986452674733, "res": {"Yes": 0.99986452674733, "No": 0.0001348174387866339}, "ground_truth": 0}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0008093792193446861, "res": {"No": 0.9991901070704631, "Yes": 0.0008093792193446861}, "ground_truth": 0}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9978464689824488, "res": {"Yes": 0.9978464689824488, "No": 0.002152204743880126}, "ground_truth": 1}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999891973193493, "res": {"Yes": 0.9999891973193493, "No": 1.0153064265090981e-05}, "ground_truth": 0}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.683839581613943e-07}, "ground_truth": 0}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9099935840414476, "res": {"Yes": 0.9099935840414476, "No": 0.09000572496074508}, "ground_truth": 0}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, "\"Yes": 3.90370750532519e-08}, "ground_truth": 0}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999973031140366, "res": {"Yes": 0.9999973031140366, "No": 1.973452521799287e-06}, "ground_truth": 1}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 3.814317123560514e-07}, "ground_truth": 0}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.3409665379869182e-07}, "ground_truth": 0}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00014687656761491248, "res": {"No": 0.999852727245697, "Yes": 0.00014687656761491248}, "ground_truth": 0}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999841908319662, "res": {"Yes": 0.9999841908319662, "No": 1.531282263255734e-05}, "ground_truth": 0}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 3.679530457458863e-07}, "ground_truth": 1}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999977799274644, "res": {"Yes": 0.9999977799274644, "No": 1.5013738035123366e-06}, "ground_truth": 0}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.5111178137386913e-07}, "ground_truth": 0}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.11082323771376343, "res": {"No": 0.8891764337487422, "Yes": 0.11082323771376343}, "ground_truth": 0}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 6.206819773227465e-07}, "ground_truth": 0}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.6271162033150318e-08}, "ground_truth": 1}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.6974415344850648e-08}, "ground_truth": 0}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.98714997324694e-08}, "ground_truth": 0}, {"key": "7c72b57e65ddefc77362ed64f7dbc0aebb87ac81", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 3.901751512021081e-06, "res": {"No": 0.9999956342685299, "Yes": 3.901751512021081e-06}, "ground_truth": 0}, {"key": "7c72b57e65ddefc77362ed64f7dbc0aebb87ac81", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9632223864031905, "res": {"Yes": 0.9632223864031905, "2": 0.03672709547524776}, "ground_truth": 1}, {"key": "7c72b57e65ddefc77362ed64f7dbc0aebb87ac81", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 9.416434040638848e-05, "res": {"No": 0.9999051631772958, "Yes": 9.416434040638848e-05}, "ground_truth": 0}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.02070531186073989, "res": {"No": 0.9792936710546529, "Yes": 0.02070531186073989}, "ground_truth": 0}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999813299942867, "res": {"Yes": 0.9999813299942867, "No": 1.801278093664422e-05}, "ground_truth": 0}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.753699665508429e-07}, "ground_truth": 1}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999827604126034, "res": {"Yes": 0.9999827604126034, "1": 1.5117538471048482e-05}, "ground_truth": 0}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.208260301850704e-06}, "ground_truth": 0}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 5.063558808711917e-06, "res": {"No": 0.9999944422379444, "Yes": 5.063558808711917e-06}, "ground_truth": 0}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.250318915230988e-07}, "ground_truth": 0}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.74290011949023e-08}, "ground_truth": 1}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.299878790331851e-08}, "ground_truth": 0}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.5992754781720153e-08}, "ground_truth": 0}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.8961461154710744, "res": {"Yes": 0.8961461154710744, "No": 0.1038528485054078}, "ground_truth": 0}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999964686909351, "res": {"Yes": 0.9999964686909351, "No": 2.7005232700750706e-06}, "ground_truth": 0}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999980183344636, "res": {"Yes": 0.9999980183344636, "No": 1.2550664984142678e-06}, "ground_truth": 1}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 3.5229462802936197e-07}, "ground_truth": 0}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.153633645146379e-08}, "ground_truth": 0}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.999853323178097, "res": {"Yes": 0.999853323178097, "No": 0.00014494981194150045}, "ground_truth": 0}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.649197527363482e-08}, "ground_truth": 0}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999943230348141, "res": {"Yes": 0.9999943230348141, "No": 5.40861232422407e-06}, "ground_truth": 1}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999356768848203, "res": {"Yes": 0.9999356768848203, "1": 6.342598008104835e-05}, "ground_truth": 0}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9978222552344687, "res": {"Yes": 0.9978222552344687, "No": 0.0021765496181749572}, "ground_truth": 0}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9989867568778287, "res": {"Yes": 0.9989867568778287, "No": 0.0010124896310239686}, "ground_truth": 0}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9910780499246757, "res": {"Yes": 0.9910780499246757, "No": 0.008921516817871185}, "ground_truth": 0}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.0478354476445602e-06}, "ground_truth": 1}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.30343051068814e-08}, "ground_truth": 0}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 7.089707971851575e-07}, "ground_truth": 0}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9998235307425315, "res": {"Yes": 0.9998235307425315, "No": 0.0001761071396723072}, "ground_truth": 0}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.6626401144327219, "res": {"Yes": 0.6626401144327219, "No": 0.3373594162138125}, "ground_truth": 0}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.461554301733757e-08}, "ground_truth": 1}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.438675017595224e-08}, "ground_truth": 0}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.4558053296028113e-08}, "ground_truth": 0}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9013974749347827, "res": {"Yes": 0.9013974749347827, "No": 0.09860185541048867}, "ground_truth": 0}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999309090568892, "res": {"Yes": 0.9999309090568892, "No": 6.866801789183373e-05}, "ground_truth": 0}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.830540071976393e-08}, "ground_truth": 1}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 7.308300404704799e-08}, "ground_truth": 0}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "\"Yes": 1.7987206297800923e-07}, "ground_truth": 0}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.392176660265863e-05, "res": {"No": 0.9999856212553752, "Yes": 1.392176660265863e-05}, "ground_truth": 0}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 8.662622951618044e-06, "res": {"No": 0.9999911045595646, "Yes": 8.662622951618044e-06}, "ground_truth": 0}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999996945503965, "res": {"Yes": 0.999996945503965, "No": 2.8061306960965627e-06}, "ground_truth": 1}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 1.3326382858974683e-07}, "ground_truth": 0}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 4.058034126116452e-07}, "ground_truth": 0}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.055024881782744874, "res": {"No": 0.9449744584017099, "Yes": 0.055024881782744874}, "ground_truth": 0}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.0100871870691468e-07}, "ground_truth": 0}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.0650979340083132e-07}, "ground_truth": 1}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 8.559285730318957e-08}, "ground_truth": 0}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999148177982098, "res": {"Yes": 0.9999148177982098, "No": 8.445007724733405e-05}, "ground_truth": 0}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0006037610807239174, "res": {"No": 0.9993959178311462, "Yes": 0.0006037610807239174}, "ground_truth": 0}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 1.345873921699555e-06}, "ground_truth": 0}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 9.540288670382642e-08}, "ground_truth": 1}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9977499819938991, "res": {"Yes": 0.9977499819938991, "No": 0.0022480491124816336}, "ground_truth": 0}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 6.836546120492536e-07}, "ground_truth": 0}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0017061771430271937, "res": {"No": 0.9982935591286978, "Yes": 0.0017061771430271937}, "ground_truth": 0}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.411900673792155e-08}, "ground_truth": 0}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 4.0910254134161705e-07}, "ground_truth": 1}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999413983091462, "res": {"Yes": 0.9999413983091462, "No": 5.778030987070591e-05}, "ground_truth": 0}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999715555225518, "res": {"Yes": 0.9999715555225518, "No": 2.7542232611331796e-05}, "ground_truth": 0}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9998906218122302, "res": {"Yes": 0.9998906218122302, "No": 0.00010892652343143006}, "ground_truth": 0}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9932226793334912, "res": {"Yes": 0.9932226793334912, "No": 0.006776337599790847}, "ground_truth": 0}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.723411866724778e-08}, "ground_truth": 1}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.848699251511286e-08}, "ground_truth": 0}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.958834817450606e-08}, "ground_truth": 0}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 6.239072652552477e-06, "res": {"No": 0.9999934886141991, "Yes": 6.239072652552477e-06}, "ground_truth": 0}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999973031140366, "res": {"Yes": 0.9999973031140366, "No": 1.9039707197422142e-06}, "ground_truth": 0}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "yes": 4.3859824602571345e-07}, "ground_truth": 1}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9957414659814922, "res": {"Yes": 0.9957414659814922, "No": 0.004258336874396511}, "ground_truth": 0}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 7.856779140938159e-08}, "ground_truth": 0}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9996338649837903, "res": {"Yes": 0.9996338649837903, "No": 0.0003619440196310399}, "ground_truth": 0}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0006369371964199667, "res": {"No": 0.9993628218842474, "Yes": 0.0006369371964199667}, "ground_truth": 0}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996915181203224, "res": {"Yes": 0.9996915181203224, "No": 0.0003079217086493667}, "ground_truth": 1}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999891973193493, "res": {"Yes": 0.9999891973193493, "1": 6.081744371061002e-06}, "ground_truth": 0}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9890427161879284, "res": {"Yes": 0.9890427161879284, "No": 0.010956257771213134}, "ground_truth": 0}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.6537461452418287, "res": {"Yes": 0.6537461452418287, "No": 0.34625360845645653}, "ground_truth": 0}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.013450760927489935, "res": {"No": 0.9865489982247543, "Yes": 0.013450760927489935}, "ground_truth": 0}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.5976200253699565e-07}, "ground_truth": 1}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.6830621725616663e-08}, "ground_truth": 0}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999965878943212, "res": {"Yes": 0.9999965878943212, "No": 3.1736284601672427e-06}, "ground_truth": 0}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9973431004219551, "res": {"Yes": 0.9973431004219551, "No": 0.0026566217025603607}, "ground_truth": 0}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.09591420286643e-08}, "ground_truth": 1}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.7300757122835543e-08}, "ground_truth": 0}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.3051748126920555e-08}, "ground_truth": 0}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.512518769870129e-08}, "ground_truth": 0}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.015409532000014938, "res": {"No": 0.9845888845766461, "Yes": 0.015409532000014938}, "ground_truth": 0}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.232228909426901e-07}, "ground_truth": 1}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, " Yes": 5.175891790431446e-07}, "ground_truth": 0}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 1.398212392117688e-06}, "ground_truth": 0}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.997294772012837e-07}, "ground_truth": 0}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.0519763688136461e-07}, "ground_truth": 0}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999942038320978, "res": {"Yes": 0.9999942038320978, "No": 5.54757312840177e-06}, "ground_truth": 1}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.7919056293237737e-07}, "ground_truth": 0}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.131667955967341e-08}, "ground_truth": 0}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.011427311993555395, "res": {"No": 0.9885715960608837, "Yes": 0.011427311993555395}, "ground_truth": 0}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.41458856092110646, "res": {"No": 0.5854067349043769, "Yes": 0.41458856092110646}, "ground_truth": 0}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.7721502698013208e-07}, "ground_truth": 1}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "yes": 3.10597181700391e-07}, "ground_truth": 0}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.1056801333944257e-07}, "ground_truth": 0}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999670259236568, "res": {"Yes": 0.9999670259236568, "No": 3.227466502698718e-05}, "ground_truth": 0}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.3734408369446953e-07}, "ground_truth": 0}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.5741905965488205e-08}, "ground_truth": 1}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999852636485811, "res": {"Yes": 0.9999852636485811, "The": 6.739680754250902e-06}, "ground_truth": 0}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 4.465430610315286e-07}, "ground_truth": 0}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9588970895727136, "res": {"Yes": 0.9588970895727136, "No": 0.04110232046792913}, "ground_truth": 0}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 9.039869549657694e-08}, "ground_truth": 0}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999980183344636, "res": {"Yes": 0.9999980183344636, "No": 1.2646375482030988e-06}, "ground_truth": 1}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.1734330122530739e-07}, "ground_truth": 0}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.419881411013368e-08}, "ground_truth": 0}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00019849155556027402, "res": {"No": 0.9998011283375913, "Yes": 0.00019849155556027402}, "ground_truth": 0}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 5.292118567030318e-07}, "ground_truth": 0}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8099280195641293, "res": {"Yes": 0.8099280195641293, "No": 0.1900711506149203}, "ground_truth": 1}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 3.884152230627652e-08}, "ground_truth": 0}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998863309608148, "res": {"Yes": 0.9998863309608148, "No": 0.00011342848587765027}, "ground_truth": 0}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9996117051659595, "res": {"Yes": 0.9996117051659595, "No": 0.00038796982188997524}, "ground_truth": 0}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993766333737569, "res": {"Yes": 0.9993766333737569, "No": 0.0006233446386977265}, "ground_truth": 0}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 8.762460495160946e-08}, "ground_truth": 1}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999902701413353, "res": {"Yes": 0.9999902701413353, "No": 9.417753309484832e-06}, "ground_truth": 0}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999938462231346, "res": {"Yes": 0.9999938462231346, "No": 5.7923237985364824e-06}, "ground_truth": 0}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999950382530095, "res": {"Yes": 0.9999950382530095, "No": 4.774792693251041e-06}, "ground_truth": 0}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.5578566224950174e-07}, "ground_truth": 0}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 5.392794177942879e-07}, "ground_truth": 1}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.7417474837684386e-08}, "ground_truth": 0}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 4.1016041404482046e-07}, "ground_truth": 0}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9998986076205548, "res": {"Yes": 0.9998986076205548, "No": 0.00010081907339887104}, "ground_truth": 0}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999441398352371, "res": {"Yes": 0.9999441398352371, "No": 5.55072452881392e-05}, "ground_truth": 0}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 2.2876901741636573e-07}, "ground_truth": 1}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.58428496548959e-08}, "ground_truth": 0}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992021259708942, "res": {"Yes": 0.9992021259708942, "No": 0.0007970636662444947}, "ground_truth": 0}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.4202136275470072, "res": {"No": 0.5797858544989167, "Yes": 0.4202136275470072}, "ground_truth": 0}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 4.4687134927142905e-06, "res": {"No": 0.999995276659155, "Yes": 4.4687134927142905e-06}, "ground_truth": 0}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 8.656458969285968e-08}, "ground_truth": 1}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.0013449271916443338, "res": {"No": 0.9986548398400833, "Yes": 0.0013449271916443338}, "ground_truth": 0}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9970632301699954, "res": {"Yes": 0.9970632301699954, "No": 0.0029356851844023098}, "ground_truth": 0}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.017276484147398307, "res": {"No": 0.9827230098103198, "Yes": 0.017276484147398307}, "ground_truth": 0}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.5456903684366304e-07}, "ground_truth": 0}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999956342685299, "res": {"Yes": 0.9999956342685299, "No": 4.012949704100001e-06}, "ground_truth": 1}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.7378498851953195e-07}, "ground_truth": 0}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999987886094374, "res": {"Yes": 0.999987886094374, "No": 1.0101031607262569e-05}, "ground_truth": 0}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.983452735259752, "res": {"Yes": 0.983452735259752, "No": 0.016546924917701638}, "ground_truth": 0}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999959918780326, "res": {"Yes": 0.9999959918780326, "No": 3.4637594933524027e-06}, "ground_truth": 0}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.2777557794482977e-07}, "ground_truth": 1}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.470948800856149e-08}, "ground_truth": 0}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 1.9987213497076867e-07}, "ground_truth": 0}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.0592737777174202e-06}, "ground_truth": 0}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999602315637852, "res": {"Yes": 0.9999602315637852, "No": 3.8932584895403384e-05}, "ground_truth": 0}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.0107267746666804e-07}, "ground_truth": 1}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.158753259638314e-07}, "ground_truth": 0}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.0684838178837086e-07}, "ground_truth": 0}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9990708641143058, "res": {"Yes": 0.9990708641143058, "No": 0.0009286951700131728}, "ground_truth": 0}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0029627215218749016, "res": {"No": 0.9970370408534974, "Yes": 0.0029627215218749016}, "ground_truth": 0}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 4.3142636063581916e-07}, "ground_truth": 1}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.1075159791487168e-07}, "ground_truth": 0}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999902701413353, "res": {"Yes": 0.9999902701413353, "No": 9.457334310205391e-06}, "ground_truth": 0}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "YES": 6.93895192701708e-08}, "ground_truth": 0}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999933694113825, "res": {"Yes": 0.9999933694113825, "No": 6.4176021943743355e-06}, "ground_truth": 0}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 7.489909022737172e-07}, "ground_truth": 1}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.240541034975e-08}, "ground_truth": 0}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 3.360906236853695e-07}, "ground_truth": 0}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.005551300801708337, "res": {"No": 0.9944485266964485, "Yes": 0.005551300801708337}, "ground_truth": 0}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 8.12371015102274e-05, "res": {"No": 0.9999183936139823, "Yes": 8.12371015102274e-05}, "ground_truth": 0}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9555996192226051, "res": {"Yes": 0.9555996192226051, "No": 0.044400169144073916}, "ground_truth": 1}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999992773397112, "res": {"Yes": 0.999992773397112, "No": 7.030765272206164e-06}, "ground_truth": 0}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9619488184559672, "res": {"Yes": 0.9619488184559672, "No": 0.03805071133901164}, "ground_truth": 0}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.000537406535781865, "res": {"No": 0.9994620051711444, "Yes": 0.000537406535781865}, "ground_truth": 0}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.609452641063785e-08}, "ground_truth": 0}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999907469518097, "res": {"Yes": 0.9999907469518097, "No": 9.027320208247938e-06}, "ground_truth": 1}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.671744958911527e-08}, "ground_truth": 0}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 6.50717714533218e-08}, "ground_truth": 0}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 3.174936559732198e-07}, "ground_truth": 0}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999943230348141, "res": {"Yes": 0.9999943230348141, "No": 4.704627307842826e-06}, "ground_truth": 0}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "\"Yes": 1.0741349618112771e-07}, "ground_truth": 1}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 8.496189288339456e-08}, "ground_truth": 0}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.3227523123756556e-07}, "ground_truth": 0}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 2.970043653807636e-07}, "ground_truth": 0}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.001973943421965277, "res": {"No": 0.9980258553918464, "Yes": 0.001973943421965277}, "ground_truth": 0}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 1.302389495785457e-07}, "ground_truth": 1}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.467804683821244e-08}, "ground_truth": 0}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.8631681141011754e-08}, "ground_truth": 0}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999776347571058, "res": {"Yes": 0.9999776347571058, "No": 2.2102562631201468e-05}, "ground_truth": 0}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.4965877014320317e-07}, "ground_truth": 0}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.10062206563224e-08}, "ground_truth": 1}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.7947157465967894e-07}, "ground_truth": 0}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999992773397112, "res": {"Yes": 0.999992773397112, "No": 6.7480836303753905e-06}, "ground_truth": 0}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00014416385836490083, "res": {"No": 0.9998555877184575, "Yes": 0.00014416385836490083}, "ground_truth": 0}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.40542916753771796, "res": {"No": 0.5945701925085805, "Yes": 0.40542916753771796}, "ground_truth": 0}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 2.4163705724767614e-07}, "ground_truth": 1}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.8229237186382476e-07}, "ground_truth": 0}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "YES": 1.6926132746136076e-07}, "ground_truth": 0}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9990458805017415, "res": {"Yes": 0.9990458805017415, "No": 0.0009534147923002818}, "ground_truth": 0}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9984849758096116, "res": {"Yes": 0.9984849758096116, "No": 0.0015144302248015513}, "ground_truth": 0}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.3159537013750783e-07}, "ground_truth": 1}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999826412106655, "res": {"Yes": 0.9999826412106655, "No": 1.709319716872369e-05}, "ground_truth": 0}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.657988545818275e-07}, "ground_truth": 0}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9812527601477974, "res": {"Yes": 0.9812527601477974, "No": 0.018746466456107235}, "ground_truth": 0}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.125040112378628e-08}, "ground_truth": 0}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.3940841190167315e-08}, "ground_truth": 1}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 1.2395502980281312e-07}, "ground_truth": 0}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "yes": 2.87675997151425e-07}, "ground_truth": 0}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999387759842665, "res": {"Yes": 0.9999387759842665, "No": 6.032354898723908e-05}, "ground_truth": 0}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999939654258081, "res": {"Yes": 0.9999939654258081, "No": 5.83088947866556e-06}, "ground_truth": 0}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9783136234298534, "res": {"Yes": 0.9783136234298534, "No": 0.0216854489208986}, "ground_truth": 1}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 3.5311395973560256e-07}, "ground_truth": 0}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999974223173222, "res": {"Yes": 0.9999974223173222, "No": 1.9121046721216677e-06}, "ground_truth": 0}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999284059529762, "res": {"Yes": 0.9999284059529762, "No": 7.076648818792165e-05}, "ground_truth": 0}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 4.8770350571837594e-05, "res": {"No": 0.9999509340887712, "Yes": 4.8770350571837594e-05}, "ground_truth": 0}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.24905159379132116, "res": {"No": 0.7509475874497499, "Yes": 0.24905159379132116}, "ground_truth": 1}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999912237625114, "res": {"Yes": 0.9999912237625114, "No": 7.717871070720297e-06}, "ground_truth": 0}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.8034956963084975, "res": {"Yes": 0.8034956963084975, "No": 0.19650287828878943}, "ground_truth": 0}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999955150656573, "res": {"Yes": 0.9999955150656573, "No": 3.7383732539379093e-06}, "ground_truth": 0}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.406207032670634e-08}, "ground_truth": 0}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.468664744544959e-07}, "ground_truth": 1}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.8262517657573236e-07}, "ground_truth": 0}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.989184935127182e-08}, "ground_truth": 0}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.7180357818393874e-07}, "ground_truth": 0}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.09739302332557567, "res": {"No": 0.902606446021462, "Yes": 0.09739302332557567}, "ground_truth": 0}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.31649376848142e-08}, "ground_truth": 1}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 9.807748572774173e-08}, "ground_truth": 0}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9982375995547844, "res": {"Yes": 0.9982375995547844, "No": 0.001761891731013399}, "ground_truth": 0}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.16255179329923983, "res": {"No": 0.8374478046759881, "Yes": 0.16255179329923983}, "ground_truth": 0}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.3524579638980678e-07}, "ground_truth": 0}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 2.7508354098865334e-07}, "ground_truth": 1}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.1508135467038e-08}, "ground_truth": 0}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.2070563788900963e-07}, "ground_truth": 0}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.260704448644016e-07}, "ground_truth": 0}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 7.966225115911536e-08}, "ground_truth": 0}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.2056208361165742e-07}, "ground_truth": 1}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.0801492604249863e-07}, "ground_truth": 0}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.157333391008208e-06}, "ground_truth": 0}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.002479932343559889, "res": {"No": 0.9975187485060147, "Yes": 0.002479932343559889}, "ground_truth": 0}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.5961434815705744, "res": {"Yes": 0.5961434815705744, "No": 0.403855838539548}, "ground_truth": 0}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.00662063296205788, "res": {"No": 0.9933789945892307, "Yes": 0.00662063296205788}, "ground_truth": 1}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9893598897341873, "res": {"Yes": 0.9893598897341873, "No": 0.010639610530658606}, "ground_truth": 0}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.006242603225649542, "res": {"No": 0.9937569914668544, "Yes": 0.006242603225649542}, "ground_truth": 0}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.10279540761137927, "res": {"No": 0.8967825414685907, "Yes": 0.10279540761137927}, "ground_truth": 0}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 3.099078009483074e-07}, "ground_truth": 0}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 1.0042225408674579e-07}, "ground_truth": 1}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999981375378344, "res": {"Yes": 0.9999981375378344, "No": 1.4231782260530626e-06}, "ground_truth": 0}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.376583425803004e-07}, "ground_truth": 0}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 3.0359532624660138e-05, "res": {"No": 0.9999684563215123, "Yes": 3.0359532624660138e-05}, "ground_truth": 0}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.6967894807990725, "res": {"Yes": 0.6967894807990725, "No": 0.3032099224472169}, "ground_truth": 0}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99997334352929, "res": {"Yes": 0.99997334352929, "No": 2.614020519374608e-05}, "ground_truth": 1}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999652379302147, "res": {"Yes": 0.9999652379302147, "Authorities": 3.231493663840698e-05}, "ground_truth": 0}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 7.474697505440498e-07}, "ground_truth": 0}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 3.7213758980544365e-05, "res": {"No": 0.9999622579442493, "Yes": 3.7213758980544365e-05}, "ground_truth": 0}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999961110815618, "res": {"Yes": 0.9999961110815618, "No": 3.423449121640123e-06}, "ground_truth": 0}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998933632061071, "res": {"Yes": 0.9998933632061071, "No": 0.00010647354579088392}, "ground_truth": 1}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.2510705827525086e-07}, "ground_truth": 0}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.1361832864134952e-07}, "ground_truth": 0}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.999965953125608, "res": {"Yes": 0.999965953125608, "No": 3.294279452462741e-05}, "ground_truth": 0}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 5.462653446711428e-07}, "ground_truth": 0}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.822885938088871e-08}, "ground_truth": 1}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 5.978702976993024e-07}, "ground_truth": 0}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 3.0196310376465017e-07}, "ground_truth": 0}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.5175713132267011, "res": {"Yes": 0.5175713132267011, "No": 0.4824276585066363}, "ground_truth": 0}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 2.693783338294425e-07}, "ground_truth": 0}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.0521666776983769e-07}, "ground_truth": 1}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.6118597731801716e-07}, "ground_truth": 0}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999976607241361, "res": {"Yes": 0.9999976607241361, "No": 1.749207027263571e-06}, "ground_truth": 0}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0012329575297997644, "res": {"No": 0.9987668385692201, "Yes": 0.0012329575297997644}, "ground_truth": 0}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.8381678501908765, "res": {"Yes": 0.8381678501908765, "No": 0.16183177649102515}, "ground_truth": 0}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.33033152913154e-08}, "ground_truth": 1}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.086465831300509e-08}, "ground_truth": 0}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 4.9470663481584336e-08}, "ground_truth": 0}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00027425286834334976, "res": {"No": 0.9997251163874251, "Yes": 0.00027425286834334976}, "ground_truth": 0}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0002808078304453619, "res": {"No": 0.9997188003440366, "Yes": 0.0002808078304453619}, "ground_truth": 0}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 6.203854199263865e-08}, "ground_truth": 1}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.075090258809984e-07}, "ground_truth": 0}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.2856580018172338e-07}, "ground_truth": 0}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.1983914658352163, "res": {"No": 0.8016080309913016, "Yes": 0.1983914658352163}, "ground_truth": 0}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999810662776555, "res": {"Yes": 0.999810662776555, "No": 0.00018904471189690783}, "ground_truth": 0}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.191426883941542e-07}, "ground_truth": 1}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.5845645709732744e-07}, "ground_truth": 0}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.019774161718686e-08}, "ground_truth": 0}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.009195123406241327, "res": {"No": 0.9908044837437021, "Yes": 0.009195123406241327}, "ground_truth": 0}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.551913301809106e-06, "res": {"No": 0.9999977799274644, "Yes": 1.551913301809106e-06}, "ground_truth": 0}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.24792115667411377, "res": {"No": 0.7520783286633789, "Yes": 0.24792115667411377}, "ground_truth": 1}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999434246564451, "res": {"Yes": 0.9999434246564451, "No": 5.5443448646157854e-05}, "ground_truth": 0}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.6643790061175394, "res": {"Yes": 0.6643790061175394, "No": 0.33561886272305624}, "ground_truth": 0}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 3.45086341507138e-05, "res": {"No": 0.9999651187283657, "Yes": 3.45086341507138e-05}, "ground_truth": 0}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.6199651731510847e-07}, "ground_truth": 0}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.5264650706687057e-07}, "ground_truth": 1}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.6031551415684112e-07}, "ground_truth": 0}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.0154284594483611e-07}, "ground_truth": 0}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999964686909351, "res": {"Yes": 0.9999964686909351, "No": 2.931026074982292e-06}, "ground_truth": 0}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.710233890468225, "res": {"Yes": 0.710233890468225, "No": 0.2897654431484007}, "ground_truth": 0}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 9.343077959639725e-08}, "ground_truth": 1}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.763954768715266e-08}, "ground_truth": 0}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 7.964321415610076e-08}, "ground_truth": 0}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 8.02018629938454e-07}, "ground_truth": 0}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999964686909351, "res": {"Yes": 0.9999964686909351, "No": 2.965834840659707e-06}, "ground_truth": 0}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 5.355262615921253e-08}, "ground_truth": 1}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999932502087799, "res": {"Yes": 0.9999932502087799, "No": 6.432347505182585e-06}, "ground_truth": 0}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.17251139270929705, "res": {"No": 0.8274855037593104, "Yes": 0.17251139270929705}, "ground_truth": 0}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9998980116571646, "res": {"Yes": 0.9998980116571646, "No": 0.00010166281052567094}, "ground_truth": 0}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9623319716204196, "res": {"Yes": 0.9623319716204196, "No": 0.037667331449194125}, "ground_truth": 0}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996418377554159, "res": {"Yes": 0.9996418377554159, "No": 0.000357679043578747}, "ground_truth": 1}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.164685694254828e-08}, "ground_truth": 0}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999979303571174, "res": {"Yes": 0.999979303571174, "No": 2.044504506384875e-05}, "ground_truth": 0}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999816875976741, "res": {"Yes": 0.9999816875976741, "No": 1.8248567028588537e-05}, "ground_truth": 0}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999946806438478, "res": {"Yes": 0.9999946806438478, "No": 5.245027883102608e-06}, "ground_truth": 0}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9682817790551302, "res": {"Yes": 0.9682817790551302, "No": 0.03171795990548322}, "ground_truth": 1}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999949190499081, "res": {"Yes": 0.9999949190499081, "No": 4.226505623689241e-06}, "ground_truth": 0}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995982477740469, "res": {"Yes": 0.9995982477740469, "No": 0.0004013410473405993}, "ground_truth": 0}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0022962842636006075, "res": {"No": 0.9977029869812627, "Yes": 0.0022962842636006075}, "ground_truth": 0}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9977743129227447, "res": {"Yes": 0.9977743129227447, "No": 0.002225144325665426}, "ground_truth": 0}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.3916770879643296e-06}, "ground_truth": 1}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 2.468477715078831e-07}, "ground_truth": 0}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999973031140366, "res": {"Yes": 0.9999973031140366, "No": 2.4540397837742886e-06}, "ground_truth": 0}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999831180165023, "res": {"Yes": 0.9999831180165023, "No": 1.640044646547665e-05}, "ground_truth": 0}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 7.589409160813187e-08}, "ground_truth": 0}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 3.8518157132588904e-07}, "ground_truth": 1}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.5052374366276583e-07}, "ground_truth": 0}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.4443016341817825e-08}, "ground_truth": 0}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.07588061720400491, "res": {"No": 0.9241189702009626, "Yes": 0.07588061720400491}, "ground_truth": 0}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0035610885798603074, "res": {"No": 0.996438365221558, "Yes": 0.0035610885798603074}, "ground_truth": 0}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983871747028433, "res": {"Yes": 0.9983871747028433, "No": 0.0016115394124681494}, "ground_truth": 1}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 2.2334012572454583e-07}, "ground_truth": 0}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999974223173222, "res": {"Yes": 0.9999974223173222, "No": 2.367781777407624e-06}, "ground_truth": 0}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00042128078451520253, "res": {"No": 0.9995784717679996, "Yes": 0.00042128078451520253}, "ground_truth": 0}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.16688316870241124, "res": {"No": 0.8331165755235537, "Yes": 0.16688316870241124}, "ground_truth": 0}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.788796934765439e-08}, "ground_truth": 1}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999737011318213, "res": {"Yes": 0.9999737011318213, "No": 2.605338124279621e-05}, "ground_truth": 0}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.9024254999479943e-07}, "ground_truth": 0}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9757613284465795, "res": {"Yes": 0.9757613284465795, "No": 0.02423797647547734}, "ground_truth": 0}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9704531185804407, "res": {"Yes": 0.9704531185804407, "No": 0.029546145014567933}, "ground_truth": 0}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.399518289550464e-08}, "ground_truth": 1}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999520068687072, "res": {"Yes": 0.9999520068687072, "No": 4.5112914495345184e-05}, "ground_truth": 0}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.0325625766674052e-07}, "ground_truth": 0}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9998210279174143, "res": {"Yes": 0.9998210279174143, "No": 0.00017850231279389054}, "ground_truth": 0}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999977799274644, "res": {"Yes": 0.9999977799274644, "No": 1.889455246129421e-06}, "ground_truth": 0}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999921773835968, "res": {"Yes": 0.9999921773835968, "No": 7.132455449536718e-06}, "ground_truth": 1}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 7.91800001800855e-07}, "ground_truth": 0}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9964235615456646, "res": {"Yes": 0.9964235615456646, "No": 0.003576009732708273}, "ground_truth": 0}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9921855944670664, "res": {"Yes": 0.9921855944670664, "No": 0.007813486499150321}, "ground_truth": 0}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 2.7276828202104263e-05, "res": {"No": 0.9999723899261651, "Yes": 2.7276828202104263e-05}, "ground_truth": 0}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6585520221052995, "res": {"Yes": 0.6585520221052995, "No": 0.34144642284343496}, "ground_truth": 1}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999992773397112, "res": {"Yes": 0.999992773397112, "No": 6.951090529342547e-06}, "ground_truth": 0}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.4891666243370792e-07}, "ground_truth": 0}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999546292272846, "res": {"Yes": 0.9999546292272846, "No": 4.40090615909923e-05}, "ground_truth": 0}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.3724322874102023e-06}, "ground_truth": 0}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998008899750327, "res": {"Yes": 0.9998008899750327, "No": 0.00019792863926198102}, "ground_truth": 1}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 8.223334172764756e-08}, "ground_truth": 0}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 3.396025071093849e-07}, "ground_truth": 0}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.997879233010868, "res": {"Yes": 0.997879233010868, "No": 0.0021204520116127504}, "ground_truth": 0}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999949190499081, "res": {"Yes": 0.9999949190499081, "No": 4.949603923696142e-06}, "ground_truth": 0}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999829988145218, "res": {"Yes": 0.9999829988145218, "No": 1.6665547390988986e-05}, "ground_truth": 1}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999980183344636, "res": {"Yes": 0.9999980183344636, "No": 1.4866221871580157e-06}, "ground_truth": 0}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.79987717542112e-08}, "ground_truth": 0}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.3854550629194082e-07}, "ground_truth": 0}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 2.77989359268753e-07}, "ground_truth": 0}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.2690137931303992e-06}, "ground_truth": 1}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.5623670381479184e-08}, "ground_truth": 0}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.0674162742359293e-07}, "ground_truth": 0}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999649995325307, "res": {"Yes": 0.9999649995325307, "No": 3.444006469232016e-05}, "ground_truth": 0}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0013569876488178118, "res": {"No": 0.9986423550349128, "Yes": 0.0013569876488178118}, "ground_truth": 0}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 8.906039578875695e-07}, "ground_truth": 1}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.0325316002547513e-07}, "ground_truth": 0}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.812614986991151e-07}, "ground_truth": 0}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 6.406067902495168e-07}, "ground_truth": 0}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998831128318374, "res": {"Yes": 0.9998831128318374, "No": 0.00011593869476093563}, "ground_truth": 0}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 1.8355894405941162e-07}, "ground_truth": 1}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999779923581718, "res": {"Yes": 0.9999779923581718, "No": 2.157720768840693e-05}, "ground_truth": 0}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999926541946805, "res": {"Yes": 0.9999926541946805, "No": 4.637398579126507e-06}, "ground_truth": 0}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.392224791277046e-08}, "ground_truth": 0}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9934807656191221, "res": {"Yes": 0.9934807656191221, "No": 0.0065185305774520174}, "ground_truth": 0}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 1.3520589219083227e-06}, "ground_truth": 1}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.1058404686377919e-07}, "ground_truth": 0}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.0150528204158715e-07}, "ground_truth": 0}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.255724496385671, "res": {"No": 0.744274939555291, "Yes": 0.255724496385671}, "ground_truth": 0}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 5.2213002711730996e-05, "res": {"No": 0.9999474773593622, "Yes": 5.2213002711730996e-05}, "ground_truth": 0}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.0015575152749488046, "res": {"No": 0.9984420752267027, "Yes": 0.0015575152749488046}, "ground_truth": 1}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 6.031505518433086e-07}, "ground_truth": 0}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.515401849460729e-07}, "ground_truth": 0}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.8728489344352759, "res": {"Yes": 0.8728489344352759, "No": 0.12715043067227555}, "ground_truth": 0}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999874092852638, "res": {"Yes": 0.9999874092852638, "No": 1.1450238482671643e-05}, "ground_truth": 0}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.5038111477055005e-07}, "ground_truth": 1}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.709663119277441e-08}, "ground_truth": 0}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.343909477720437e-08}, "ground_truth": 0}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9996549418762527, "res": {"Yes": 0.9996549418762527, "No": 0.00034470209731334977}, "ground_truth": 0}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.823659252227568e-07}, "ground_truth": 0}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, " Yes": 2.5315598416472225e-07}, "ground_truth": 1}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 6.927893120436214e-08}, "ground_truth": 0}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.4228439507803816e-07}, "ground_truth": 0}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0002155947650647408, "res": {"No": 0.9997842085863171, "Yes": 0.0002155947650647408}, "ground_truth": 0}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998837087823479, "res": {"Yes": 0.9998837087823479, "No": 0.00011556100333859837}, "ground_truth": 0}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997992214587097, "res": {"Yes": 0.9997992214587097, "No": 0.00020005634621899783}, "ground_truth": 1}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 1.789307459683546e-07}, "ground_truth": 0}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.4582740194895081e-06}, "ground_truth": 0}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999853828508316, "res": {"Yes": 0.9999853828508316, "No": 1.0881809674402087e-05}, "ground_truth": 0}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.455283253998731e-06}, "ground_truth": 0}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.070815653383905e-07}, "ground_truth": 1}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.7751523667987854e-08}, "ground_truth": 0}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 4.0999371771983456e-07}, "ground_truth": 0}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.8808483251613192, "res": {"Yes": 0.8808483251613192, "No": 0.11915074772071065}, "ground_truth": 0}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 4.2596783424284225e-07}, "ground_truth": 0}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 8.882017649283905e-08}, "ground_truth": 1}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 4.7500191563695386e-07}, "ground_truth": 0}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.938693728889152e-08}, "ground_truth": 0}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.001443095239974598, "res": {"No": 0.9985564013923657, "Yes": 0.001443095239974598}, "ground_truth": 0}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 7.414744108170681e-05, "res": {"No": 0.9999253068846696, "Yes": 7.414744108170681e-05}, "ground_truth": 0}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.0001730212739876385, "res": {"No": 0.9998265103110769, "Yes": 0.0001730212739876385}, "ground_truth": 1}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.049327349261342086, "res": {"No": 0.9506722814309153, "Yes": 0.049327349261342086}, "ground_truth": 0}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999964686909351, "res": {"Yes": 0.9999964686909351, "No": 2.715563704942127e-06}, "ground_truth": 0}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.94913452114654, "res": {"Yes": 0.94913452114654, "No": 0.05086475659815066}, "ground_truth": 0}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999798995780161, "res": {"Yes": 0.9999798995780161, "No": 1.925790369379796e-05}, "ground_truth": 0}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 4.163510759599494e-07}, "ground_truth": 1}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999454510038506, "res": {"Yes": 0.9999454510038506, "No": 5.333509503701868e-05}, "ground_truth": 0}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 2.2814031057783824e-07}, "ground_truth": 0}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9993862792436722, "res": {"Yes": 0.9993862792436722, "No": 0.0006129963968137924}, "ground_truth": 0}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 5.671392344276185e-07, "res": {"No": 0.9999988527586581, "Yes": 5.671392344276185e-07}, "ground_truth": 0}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 2.1424384671449396e-07}, "ground_truth": 1}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999975415208221, "res": {"Yes": 0.9999975415208221, "No": 2.203183936465714e-06}, "ground_truth": 0}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.2008326821599965e-08}, "ground_truth": 0}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.647871604421897e-08}, "ground_truth": 0}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.266146012700659e-07}, "ground_truth": 0}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.675619858254634e-08}, "ground_truth": 1}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.4687427420361043e-08}, "ground_truth": 0}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "YES": 7.363657438970356e-08}, "ground_truth": 0}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, " Yes": 3.3845489605033577e-07}, "ground_truth": 0}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.252927436652178e-08}, "ground_truth": 0}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 9.212716371340938e-08}, "ground_truth": 1}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.1690069771603681e-07}, "ground_truth": 0}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999367496503877, "res": {"Yes": 0.9999367496503877, "No": 6.293365384864346e-05}, "ground_truth": 0}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999970647075079, "res": {"Yes": 0.9999970647075079, "No": 2.864940430959332e-06}, "ground_truth": 0}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999860980626328, "res": {"Yes": 0.9999860980626328, "No": 1.32405826715305e-05}, "ground_truth": 0}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.328439109253353e-07}, "ground_truth": 1}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.956008096332067, "res": {"Yes": 0.956008096332067, "No": 0.04399100930148864}, "ground_truth": 0}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "YES": 7.157298826747003e-07}, "ground_truth": 0}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.11194041031894032, "res": {"No": 0.8880592280142042, "Yes": 0.11194041031894032}, "ground_truth": 0}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999922965856715, "res": {"Yes": 0.9999922965856715, "No": 7.372630466808968e-06}, "ground_truth": 0}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.377778818942171e-07}, "ground_truth": 1}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.4228709850722715e-07}, "ground_truth": 0}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999964686909351, "res": {"Yes": 0.9999964686909351, "No": 3.099142802602781e-06}, "ground_truth": 0}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.547548279195619, "res": {"Yes": 0.547548279195619, "No": 0.45245016703778174}, "ground_truth": 0}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.02140177535246059, "res": {"No": 0.9785980399905565, "Yes": 0.02140177535246059}, "ground_truth": 0}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 4.788990563753387e-07}, "ground_truth": 1}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.569222353643036e-08}, "ground_truth": 0}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.9576743213595566e-08}, "ground_truth": 0}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999920581810364, "res": {"Yes": 0.9999920581810364, "No": 7.72530696036762e-06}, "ground_truth": 0}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9959361705941137, "res": {"Yes": 0.9959361705941137, "No": 0.0040631977184213556}, "ground_truth": 0}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.0773945264922446e-07}, "ground_truth": 1}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.167303246067555e-07}, "ground_truth": 0}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 2.437913744840214e-07}, "ground_truth": 0}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.1338882448772965e-07}, "ground_truth": 0}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0059707424250783365, "res": {"No": 0.9940289008961533, "Yes": 0.0059707424250783365}, "ground_truth": 0}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999983237218497, "res": {"Yes": 0.999983237218497, "No": 1.646527401504844e-05}, "ground_truth": 1}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999953958625991, "res": {"Yes": 0.9999953958625991, "No": 3.917581730543406e-06}, "ground_truth": 0}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999977799274644, "res": {"Yes": 0.9999977799274644, "No": 1.5385902236459983e-06}, "ground_truth": 0}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9989388152551866, "res": {"Yes": 0.9989388152551866, "No": 0.0010603576734405566}, "ground_truth": 0}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 2.888581788224125e-07}, "ground_truth": 0}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.435298226491486e-07}, "ground_truth": 1}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "YES": 1.7820326499421935e-07}, "ground_truth": 0}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.1658525838126261e-07}, "ground_truth": 0}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 4.5312128952748393e-07, "res": {"No": 0.9999989719621284, "Yes": 4.5312128952748393e-07}, "ground_truth": 0}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999726283256111, "res": {"Yes": 0.9999726283256111, "No": 2.6641723991242264e-05}, "ground_truth": 0}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.452410255006655e-08}, "ground_truth": 1}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.583957798946853e-07}, "ground_truth": 0}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.2972824468240079e-07}, "ground_truth": 0}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999890781166442, "res": {"Yes": 0.9999890781166442, "No": 1.0336109675092625e-05}, "ground_truth": 0}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9445818180608936, "res": {"Yes": 0.9445818180608936, "No": 0.055417757727073615}, "ground_truth": 0}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6845190048954393, "res": {"Yes": 0.6845190048954393, "No": 0.315480344016094}, "ground_truth": 1}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.0050754878294047e-07}, "ground_truth": 0}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999613043536958, "res": {"Yes": 0.9999613043536958, "No": 3.825160644544193e-05}, "ground_truth": 0}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 1.367139441415513e-06}, "ground_truth": 0}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.30006999688458e-08}, "ground_truth": 0}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999923042191404, "res": {"Yes": 0.999923042191404, "No": 7.668004905886989e-05}, "ground_truth": 1}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.489479949339496e-08}, "ground_truth": 0}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.189099288802164e-08}, "ground_truth": 0}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.006633440369131564, "res": {"No": 0.9933662932272687, "Yes": 0.006633440369131564}, "ground_truth": 0}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985467745562164, "res": {"Yes": 0.9985467745562164, "No": 0.0014526308314603527}, "ground_truth": 0}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999958726752174, "res": {"Yes": 0.9999958726752174, "No": 3.860669283538553e-06}, "ground_truth": 1}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.459430086521296e-07}, "ground_truth": 0}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.2129975994710166e-07}, "ground_truth": 0}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.01912932081154562, "res": {"No": 0.9808701108745873, "Yes": 0.01912932081154562}, "ground_truth": 0}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0003612442628672768, "res": {"No": 0.9996380246288984, "Yes": 0.0003612442628672768}, "ground_truth": 0}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 7.274330228092856e-08}, "ground_truth": 1}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9980615848580012, "res": {"Yes": 0.9980615848580012, "No": 0.0019379607859620232}, "ground_truth": 0}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999872900832717, "res": {"Yes": 0.9999872900832717, "No": 1.2669424334285806e-05}, "ground_truth": 0}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.527358128772404, "res": {"Yes": 0.527358128772404, "No": 0.47264142720438945}, "ground_truth": 0}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.002238106685971591, "res": {"No": 0.9977614967935605, "Yes": 0.002238106685971591}, "ground_truth": 0}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.165747661801644e-07}, "ground_truth": 1}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.39555789502172e-08}, "ground_truth": 0}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.730822594098376e-08}, "ground_truth": 0}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9936015021285195, "res": {"Yes": 0.9936015021285195, "No": 0.0063976681920401585}, "ground_truth": 0}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.337612543151747e-08}, "ground_truth": 0}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.217837385531057e-07}, "ground_truth": 1}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.9841244144894245e-08}, "ground_truth": 0}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.3491985393255075e-08}, "ground_truth": 0}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999424710798518, "res": {"Yes": 0.9999424710798518, "No": 5.45356468207607e-05}, "ground_truth": 0}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.3686508381851267e-05, "res": {"No": 0.999985978860297, "Yes": 1.3686508381851267e-05}, "ground_truth": 0}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.47007556663774e-08}, "ground_truth": 1}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997831359983979, "res": {"Yes": 0.9997831359983979, "No": 0.00021609552557727763}, "ground_truth": 0}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.3160453737738375e-07}, "ground_truth": 0}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999579667834204, "res": {"Yes": 0.9999579667834204, "No": 4.1491913713106315e-05}, "ground_truth": 0}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 5.552718629393481e-07}, "ground_truth": 0}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.0397805184801803e-07}, "ground_truth": 1}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, " Yes": 3.003837539230738e-07}, "ground_truth": 0}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 9.586373384089e-07}, "ground_truth": 0}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 9.80332627547087e-08}, "ground_truth": 0}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.1005055529274445, "res": {"No": 0.899494365182248, "Yes": 0.1005055529274445}, "ground_truth": 0}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996687611716981, "res": {"Yes": 0.9996687611716981, "No": 0.0003309040843360533}, "ground_truth": 1}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.9003757902510723e-07}, "ground_truth": 0}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999980183344636, "res": {"Yes": 0.9999980183344636, "No": 1.7618115297338755e-06}, "ground_truth": 0}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 5.841230974681219e-06, "res": {"No": 0.9999936078174301, "Yes": 5.841230974681219e-06}, "ground_truth": 0}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.019464361517979135, "res": {"No": 0.98053555587466, "Yes": 0.019464361517979135}, "ground_truth": 0}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.219619280719914e-08}, "ground_truth": 1}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999963494876631, "res": {"Yes": 0.9999963494876631, "No": 2.544630767549743e-06}, "ground_truth": 0}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999959918780326, "res": {"Yes": 0.9999959918780326, "No": 3.606964493021407e-06}, "ground_truth": 0}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9976503078584732, "res": {"Yes": 0.9976503078584732, "No": 0.002349292640921539}, "ground_truth": 0}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 9.27251588909847e-08}, "ground_truth": 0}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.1096877294867841e-07}, "ground_truth": 1}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.5857054938982985e-08}, "ground_truth": 0}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.952808953899833e-08}, "ground_truth": 0}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999611851533157, "res": {"Yes": 0.9999611851533157, "No": 3.8752666175608876e-05}, "ground_truth": 0}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9976977652170343, "res": {"Yes": 0.9976977652170343, "No": 0.0023017398166117037}, "ground_truth": 0}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.6079809004187094e-08}, "ground_truth": 1}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 3.8897490771389714e-07}, "ground_truth": 0}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 1.073448708995564e-06}, "ground_truth": 0}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999908661547138, "res": {"Yes": 0.9999908661547138, "No": 8.885764999069151e-06}, "ground_truth": 0}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999728667271139, "res": {"Yes": 0.9999728667271139, "No": 2.673163132323098e-05}, "ground_truth": 0}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "YES": 8.103683763826781e-08}, "ground_truth": 1}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.1945404248446327e-07}, "ground_truth": 0}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.7807162142967695e-07}, "ground_truth": 0}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.094817483210206e-08}, "ground_truth": 0}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.36851298519326736, "res": {"No": 0.6314866737171013, "Yes": 0.36851298519326736}, "ground_truth": 0}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 6.232693924404568e-07}, "ground_truth": 1}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999940846288958, "res": {"Yes": 0.9999940846288958, "No": 5.743607848785185e-06}, "ground_truth": 0}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.833703344270745e-08}, "ground_truth": 0}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0033211638508490664, "res": {"No": 0.9966784561723577, "Yes": 0.0033211638508490664}, "ground_truth": 0}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.009351180378123767, "res": {"No": 0.9906486266052696, "Yes": 0.009351180378123767}, "ground_truth": 0}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999626155448161, "res": {"Yes": 0.9999626155448161, "No": 3.692033775787465e-05}, "ground_truth": 1}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.412430820012461e-08}, "ground_truth": 0}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.0509796749703086e-08}, "ground_truth": 0}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9996445784572067, "res": {"Yes": 0.9996445784572067, "No": 0.00035522292777974136}, "ground_truth": 0}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 2.2858374702581704e-05, "res": {"No": 0.9999768003491161, "Yes": 2.2858374702581704e-05}, "ground_truth": 0}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.982144149155266, "res": {"Yes": 0.982144149155266, "No": 0.01785575881676634}, "ground_truth": 1}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9950378626828016, "res": {"Yes": 0.9950378626828016, "No": 0.004961489405273719}, "ground_truth": 0}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 2.1114578877485293e-07}, "ground_truth": 0}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.3848269014947418e-07}, "ground_truth": 0}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 2.2980332558206055e-07}, "ground_truth": 0}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 4.615903627015693e-07}, "ground_truth": 1}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999940846288958, "res": {"Yes": 0.9999940846288958, "No": 5.005701475864522e-06}, "ground_truth": 0}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.1437194775521653e-07}, "ground_truth": 0}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.19967287031348627, "res": {"No": 0.8003269798555566, "Yes": 0.19967287031348627}, "ground_truth": 0}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.01580377094532693, "res": {"No": 0.9841960204448107, "Yes": 0.01580377094532693}, "ground_truth": 0}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999901509395023, "res": {"Yes": 0.9999901509395023, "No": 9.049364714583271e-06}, "ground_truth": 1}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999788267671561, "res": {"Yes": 0.9999788267671561, "No": 2.067157478885954e-05}, "ground_truth": 0}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999978991308068, "res": {"Yes": 0.9999978991308068, "No": 1.4132776171406834e-06}, "ground_truth": 0}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "YES": 8.069227428663921e-08}, "ground_truth": 0}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 1.807590333710108e-07}, "ground_truth": 0}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 5.811529236862613e-08}, "ground_truth": 1}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 1.916586330916567e-07}, "ground_truth": 0}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "\"Yes": 7.703669270192455e-08}, "ground_truth": 0}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999961110815618, "res": {"Yes": 0.9999961110815618, "No": 3.771091786080668e-06}, "ground_truth": 0}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 9.975426751363364e-08}, "ground_truth": 0}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 3.9982095601826755e-07}, "ground_truth": 1}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.424508227939314e-07}, "ground_truth": 0}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 9.011258535601648e-08}, "ground_truth": 0}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9445718197152657, "res": {"Yes": 0.9445718197152657, "No": 0.05542753982455717}, "ground_truth": 0}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0003680984972640072, "res": {"No": 0.9996317201316852, "Yes": 0.0003680984972640072}, "ground_truth": 0}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999667875255465, "res": {"Yes": 0.9999667875255465, "No": 3.2702108565976385e-05}, "ground_truth": 1}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "YES": 9.344152475388564e-08}, "ground_truth": 0}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.6923424781539652e-07}, "ground_truth": 0}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9965081989659645, "res": {"Yes": 0.9965081989659645, "No": 0.0034915099007844165}, "ground_truth": 0}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.3181492256563317e-07}, "ground_truth": 0}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.967480934008576e-08}, "ground_truth": 1}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999973031140366, "res": {"Yes": 0.9999973031140366, "No": 2.4459914896452755e-06}, "ground_truth": 0}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "YES": 3.123820412571779e-08}, "ground_truth": 0}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00016541369161362908, "res": {"No": 0.9998311584553382, "Yes": 0.00016541369161362908}, "ground_truth": 0}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.8671418893980132, "res": {"Yes": 0.8671418893980132, "No": 0.1328568029944116}, "ground_truth": 0}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999978991308068, "res": {"Yes": 0.9999978991308068, "No": 9.059098854803877e-07}, "ground_truth": 1}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "yes": 2.99530376647941e-07}, "ground_truth": 0}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999914621674475, "res": {"Yes": 0.9999914621674475, "No": 6.269512233786456e-06}, "ground_truth": 0}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.003663480599489821, "res": {"No": 0.9963362183863762, "Yes": 0.003663480599489821}, "ground_truth": 0}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9959697559207469, "res": {"Yes": 0.9959697559207469, "No": 0.004029807382788185}, "ground_truth": 0}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 1.5107446137307963e-07}, "ground_truth": 1}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 7.550122165208666e-08}, "ground_truth": 0}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 1.1391046487227698e-06}, "ground_truth": 0}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 3.6092444794486066e-05, "res": {"No": 0.9999636883392843, "Yes": 3.6092444794486066e-05}, "ground_truth": 0}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989548760228864, "res": {"Yes": 0.9989548760228864, "No": 0.0010444331300833445}, "ground_truth": 0}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999976607241361, "res": {"Yes": 0.9999976607241361, "No": 2.0109466029753554e-06}, "ground_truth": 1}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9707837403328696, "res": {"Yes": 0.9707837403328696, "No": 0.02921576845154239}, "ground_truth": 0}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 9.221527939450382e-08}, "ground_truth": 0}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999951574563252, "res": {"Yes": 0.9999951574563252, "No": 4.366245812005779e-06}, "ground_truth": 0}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9934732334739024, "res": {"Yes": 0.9934732334739024, "No": 0.006525873271603122}, "ground_truth": 0}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999978991308068, "res": {"Yes": 0.9999978991308068, "No": 1.6619133481235182e-06}, "ground_truth": 1}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.0764253076323093e-07}, "ground_truth": 0}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 3.710852560742139e-07}, "ground_truth": 0}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.7597917267436924e-05, "res": {"No": 0.9999821644040562, "Yes": 1.7597917267436924e-05}, "ground_truth": 0}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "\"Yes": 1.6957254628687942e-07}, "ground_truth": 0}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "\"Yes": 3.159837947832548e-08}, "ground_truth": 1}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "\"Yes": 3.9433895721195157e-07}, "ground_truth": 0}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.924713947170018e-08}, "ground_truth": 0}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999882437011058, "res": {"Yes": 0.9999882437011058, "No": 1.1515230586632768e-05}, "ground_truth": 0}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999448550185404, "res": {"Yes": 0.9999448550185404, "No": 5.463832491090008e-05}, "ground_truth": 0}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999947998470209, "res": {"Yes": 0.9999947998470209, "No": 4.809699258604368e-06}, "ground_truth": 1}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994993973733282, "res": {"Yes": 0.9994993973733282, "No": 0.0005000185301143979}, "ground_truth": 0}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.446955627722395e-07}, "ground_truth": 0}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.004285727341628293, "res": {"No": 0.9957140460288402, "Yes": 0.004285727341628293}, "ground_truth": 0}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.4051827820432303, "res": {"No": 0.5948166691443926, "Yes": 0.4051827820432303}, "ground_truth": 0}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9844688301433995, "res": {"Yes": 0.9844688301433995, "No": 0.015530211858933605}, "ground_truth": 1}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 4.1579229177250447e-07}, "ground_truth": 0}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999961110815618, "res": {"Yes": 0.9999961110815618, "No": 3.5060658133748046e-06}, "ground_truth": 0}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999268564151225, "res": {"Yes": 0.9999268564151225, "No": 7.22708999600199e-05}, "ground_truth": 0}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999558212119114, "res": {"Yes": 0.9999558212119114, "No": 4.349254560311516e-05}, "ground_truth": 0}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999696483206215, "res": {"Yes": 0.9999696483206215, "No": 2.994931814817611e-05}, "ground_truth": 1}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997049803968442, "res": {"Yes": 0.9997049803968442, "No": 0.00029210127022022777}, "ground_truth": 0}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999956342685299, "res": {"Yes": 0.9999956342685299, "No": 3.8028035003327372e-06}, "ground_truth": 0}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999626155448161, "res": {"Yes": 0.9999626155448161, "No": 3.691048134358313e-05}, "ground_truth": 0}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 9.269595506577795e-08}, "ground_truth": 0}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.2146251086940057e-08}, "ground_truth": 1}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.671474422213044e-08}, "ground_truth": 0}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 9.579503308745065e-08}, "ground_truth": 0}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999981375378344, "res": {"Yes": 0.9999981375378344, "No": 1.5285790187907715e-06}, "ground_truth": 0}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.20315990645645e-07}, "ground_truth": 0}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997446533065177, "res": {"Yes": 0.9997446533065177, "No": 0.00025512766271123714}, "ground_truth": 1}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 6.950236890447971e-08}, "ground_truth": 0}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.847362728485669e-08}, "ground_truth": 0}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998120929467422, "res": {"Yes": 0.9998120929467422, "No": 0.00018723706813148164}, "ground_truth": 0}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999934246531854, "res": {"Yes": 0.999934246531854, "No": 6.496317416026834e-05}, "ground_truth": 1}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.1442148153188318e-07}, "ground_truth": 0}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999947998470209, "res": {"Yes": 0.9999947998470209, "No": 4.46255538379981e-06}, "ground_truth": 0}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.03984057554338428, "res": {"No": 0.960158938775042, "Yes": 0.03984057554338428}, "ground_truth": 0}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 1.7311460588786676e-07}, "ground_truth": 0}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.492127244708923e-08}, "ground_truth": 1}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999656848489952, "res": {"Yes": 0.999656848489952, "No": 0.00034219845890788515}, "ground_truth": 0}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "YES": 1.4917555545985486e-07}, "ground_truth": 0}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 8.104906005049664e-06, "res": {"No": 0.9999917005724405, "Yes": 8.104906005049664e-06}, "ground_truth": 0}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999429478675348, "res": {"Yes": 0.9999429478675348, "No": 5.701101860225348e-05}, "ground_truth": 1}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.0144642604789118e-07}, "ground_truth": 0}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.00043057006272918135, "res": {"No": 0.9995691852073547, "Yes": 0.00043057006272918135}, "ground_truth": 0}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 8.243388333902481e-05, "res": {"No": 0.9999173208681078, "Yes": 8.243388333902481e-05}, "ground_truth": 0}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 8.528203621469148e-08}, "ground_truth": 0}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 4.4656940784935305e-07}, "ground_truth": 1}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9771606632134691, "res": {"Yes": 0.9771606632134691, "No": 0.022838864747612993}, "ground_truth": 0}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 9.159418956268913e-08}, "ground_truth": 0}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.029699127818059116, "res": {"No": 0.9703006820734817, "Yes": 0.029699127818059116}, "ground_truth": 0}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999918197754583, "res": {"Yes": 0.9999918197754583, "No": 7.776921439098465e-06}, "ground_truth": 0}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999640459343629, "res": {"Yes": 0.9999640459343629, "No": 3.587280752635446e-05}, "ground_truth": 1}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999936078174301, "res": {"Yes": 0.9999936078174301, "No": 6.339979645558654e-06}, "ground_truth": 0}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999965878943212, "res": {"Yes": 0.9999965878943212, "No": 3.3352230475784254e-06}, "ground_truth": 0}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 5.098103245794226e-06, "res": {"No": 0.9999938462231346, "Yes": 5.098103245794226e-06}, "ground_truth": 0}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997018820159999, "res": {"Yes": 0.9997018820159999, "No": 0.00029754800043305275}, "ground_truth": 0}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999994561441089, "res": {"Yes": 0.999994561441089, "No": 4.6798546790587454e-06}, "ground_truth": 1}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999725091278809, "res": {"Yes": 0.9999725091278809, "No": 2.7054126864545848e-05}, "ground_truth": 0}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999963494876631, "res": {"Yes": 0.9999963494876631, "No": 3.4019764037778687e-06}, "ground_truth": 0}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9943568159213466, "res": {"Yes": 0.9943568159213466, "No": 0.00564291219557566}, "ground_truth": 0}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0036526366584312057, "res": {"No": 0.9963471158733603, "Yes": 0.0036526366584312057}, "ground_truth": 0}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.4707737913325328e-07}, "ground_truth": 1}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999883629027115, "res": {"Yes": 0.9999883629027115, "No": 1.1027261863348384e-05}, "ground_truth": 0}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.7449321126259345e-07}, "ground_truth": 0}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.8396489425799823, "res": {"Yes": 0.8396489425799823, "No": 0.16035008864839526}, "ground_truth": 0}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.917688376743687e-07}, "ground_truth": 0}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.35647342003998067, "res": {"No": 0.643525613061944, "Yes": 0.35647342003998067}, "ground_truth": 1}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999748931371826, "res": {"Yes": 0.9999748931371826, "No": 2.478063640226593e-05}, "ground_truth": 0}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9902879564951017, "res": {"Yes": 0.9902879564951017, "No": 0.009711072010924287}, "ground_truth": 0}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9992390209915011, "res": {"Yes": 0.9992390209915011, "No": 0.0007606237786064566}, "ground_truth": 0}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 8.487085464272323e-07}, "ground_truth": 0}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 9.170343493395385e-08}, "ground_truth": 1}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.123489786883069e-08}, "ground_truth": 0}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.0197379178112862e-07}, "ground_truth": 0}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 5.571468428924779e-07}, "ground_truth": 0}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999907469518097, "res": {"Yes": 0.9999907469518097, "No": 8.363362490088892e-06}, "ground_truth": 0}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 9.543393652318862e-07}, "ground_truth": 1}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "yes": 3.931773714695378e-07}, "ground_truth": 0}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999572515937392, "res": {"Yes": 0.9999572515937392, "No": 4.214063840848385e-05}, "ground_truth": 0}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.998497577668166, "res": {"Yes": 0.998497577668166, "No": 0.0015020396918360096}, "ground_truth": 0}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998143573937215, "res": {"Yes": 0.9998143573937215, "No": 0.00018495833027748582}, "ground_truth": 0}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 6.780939427337648e-07}, "ground_truth": 1}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987270066443327, "res": {"Yes": 0.9987270066443327, "No": 0.0012708257410376732}, "ground_truth": 0}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999996945503965, "res": {"Yes": 0.999996945503965, "No": 2.6771493631682633e-06}, "ground_truth": 0}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 3.704852566986649e-05, "res": {"No": 0.999962734742367, "Yes": 3.704852566986649e-05}, "ground_truth": 0}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.955377597113167, "res": {"Yes": 0.955377597113167, "No": 0.04462228415526967}, "ground_truth": 0}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.810480964999924e-08}, "ground_truth": 1}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 7.235653283972049e-07}, "ground_truth": 0}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.278175971842947e-08}, "ground_truth": 0}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 4.1123373204194925e-06, "res": {"No": 0.999995276659155, "Yes": 4.1123373204194925e-06}, "ground_truth": 0}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.0937647572683727e-07}, "ground_truth": 1}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999971839107652, "res": {"Yes": 0.9999971839107652, "No": 2.2747770735750485e-06}, "ground_truth": 0}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.6942100548476485e-08}, "ground_truth": 0}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999856212553752, "res": {"Yes": 0.9999856212553752, "No": 1.4162697307803144e-05}, "ground_truth": 0}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998859733865285, "res": {"Yes": 0.9998859733865285, "No": 0.00011355003286281767}, "ground_truth": 0}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 5.823810333218121e-08}, "ground_truth": 1}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999901509395023, "res": {"Yes": 0.9999901509395023, "No": 8.877416304471229e-06}, "ground_truth": 0}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "McC": 6.487197705543347e-07}, "ground_truth": 0}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999102884643205, "res": {"Yes": 0.9999102884643205, "No": 8.931143739068479e-05}, "ground_truth": 0}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.051724589064638546, "res": {"No": 0.9482743309023942, "Yes": 0.051724589064638546}, "ground_truth": 0}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, " Yes": 4.974538741114332e-07}, "ground_truth": 1}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999603507610518, "res": {"Yes": 0.9999603507610518, "No": 3.81762104285537e-05}, "ground_truth": 0}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999971839107652, "res": {"Yes": 0.9999971839107652, "No": 1.0148010241582765e-06}, "ground_truth": 0}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.04871613449323696, "res": {"No": 0.9512834463536674, "Yes": 0.04871613449323696}, "ground_truth": 0}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999913429644723, "res": {"Yes": 0.9999913429644723, "No": 8.251521090553898e-06}, "ground_truth": 0}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.2296911717301118e-06}, "ground_truth": 1}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990082927669691, "res": {"Yes": 0.9990082927669691, "No": 0.0009905670510227016}, "ground_truth": 0}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 8.583739442041815e-07}, "ground_truth": 0}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0006874209615848619, "res": {"No": 0.9993119885884663, "Yes": 0.0006874209615848619}, "ground_truth": 0}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, " Yes": 3.687990209346523e-07}, "ground_truth": 0}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999994561441089, "res": {"Yes": 0.999994561441089, "No": 3.854701445469853e-06}, "ground_truth": 1}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999975415208221, "res": {"Yes": 0.9999975415208221, "No": 1.3839788260259866e-06}, "ground_truth": 0}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999835948245663, "res": {"Yes": 0.9999835948245663, "No": 1.4570975315764198e-05}, "ground_truth": 0}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.026870318640472e-07}, "ground_truth": 0}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.2569233725062915, "res": {"No": 0.7430751465583146, "Yes": 0.2569233725062915}, "ground_truth": 0}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 7.683380741058236e-07}, "ground_truth": 1}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999970647075079, "res": {"Yes": 0.9999970647075079, "No": 2.5820652364479385e-06}, "ground_truth": 0}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.7359913802956012e-07}, "ground_truth": 0}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.2873729266771403e-07}, "ground_truth": 0}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.3914350866121817e-07}, "ground_truth": 0}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999710787232282, "res": {"Yes": 0.9999710787232282, "No": 2.8320914443172775e-05}, "ground_truth": 1}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998292515191815, "res": {"Yes": 0.9998292515191815, "No": 0.0001704471977194184}, "ground_truth": 0}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 2.6443800467049245e-07}, "ground_truth": 0}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999381800049346, "res": {"Yes": 0.9999381800049346, "No": 6.132972023319506e-05}, "ground_truth": 0}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999995276659155, "res": {"Yes": 0.999995276659155, "No": 4.327221101162766e-06}, "ground_truth": 0}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 7.715441041685746e-07}, "ground_truth": 1}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.362670745397531e-07}, "ground_truth": 0}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "YES": 3.4866875204462856e-07}, "ground_truth": 0}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0002730111284655651, "res": {"No": 0.9997265427962388, "Yes": 0.0002730111284655651}, "ground_truth": 0}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999980183344636, "res": {"Yes": 0.9999980183344636, "No": 1.7788152591630445e-06}, "ground_truth": 1}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.23126322056141e-07}, "ground_truth": 0}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.1248623256394683e-07}, "ground_truth": 0}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 3.9756992753372576e-07}, "ground_truth": 0}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999933694113825, "res": {"Yes": 0.9999933694113825, "No": 6.4310675953843e-06}, "ground_truth": 0}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "YES": 7.153206687677326e-08}, "ground_truth": 1}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.444825695094695e-08}, "ground_truth": 0}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 5.024930640469918e-07}, "ground_truth": 0}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.31473845079354923, "res": {"No": 0.6852605829451507, "Yes": 0.31473845079354923}, "ground_truth": 0}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9973397788760245, "res": {"Yes": 0.9973397788760245, "No": 0.0026594219282839197}, "ground_truth": 0}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999964686909351, "res": {"Yes": 0.9999964686909351, "No": 2.843889217047938e-06}, "ground_truth": 1}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997744360533731, "res": {"Yes": 0.9997744360533731, "No": 0.00022489341030141886}, "ground_truth": 0}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 3.0071796644515974e-07}, "ground_truth": 0}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.999983475621529, "res": {"Yes": 0.999983475621529, "No": 1.5892456424983688e-05}, "ground_truth": 0}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.040831954727945756, "res": {"No": 0.9591676805583647, "Yes": 0.040831954727945756}, "ground_truth": 0}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987635123815563, "res": {"Yes": 0.9987635123815563, "No": 0.0012355666354446385}, "ground_truth": 1}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 1.458880923347883e-07}, "ground_truth": 0}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9980104188457163, "res": {"Yes": 0.9980104188457163, "No": 0.0019888016650396246}, "ground_truth": 0}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9941640291243016, "res": {"Yes": 0.9941640291243016, "No": 0.005834669825618346}, "ground_truth": 0}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 9.089223354317877e-05, "res": {"No": 0.9999087389545426, "Yes": 9.089223354317877e-05}, "ground_truth": 0}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.1615085692649986e-07}, "ground_truth": 1}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8657599264863406, "res": {"Yes": 0.8657599264863406, "No": 0.13423885040889988}, "ground_truth": 0}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999958085982417, "res": {"Yes": 0.999958085982417, "No": 4.1445676011625076e-05}, "ground_truth": 0}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9540166737130007, "res": {"Yes": 0.9540166737130007, "No": 0.045981759212226934}, "ground_truth": 0}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999776347571058, "res": {"Yes": 0.9999776347571058, "No": 2.1853685016540737e-05}, "ground_truth": 0}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.2527055483337195e-07}, "ground_truth": 1}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995154636256819, "res": {"Yes": 0.9995154636256819, "No": 0.00048283865873576285}, "ground_truth": 0}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999967070975216, "res": {"Yes": 0.9999967070975216, "No": 2.766194525020122e-06}, "ground_truth": 0}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00501815722161116, "res": {"No": 0.9949814482223777, "Yes": 0.00501815722161116}, "ground_truth": 0}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999232805730858, "res": {"Yes": 0.9999232805730858, "No": 7.603057469195227e-05}, "ground_truth": 0}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9942877599401809, "res": {"Yes": 0.9942877599401809, "No": 0.005711145285686912}, "ground_truth": 1}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.273109349105874e-08}, "ground_truth": 0}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998296090781823, "res": {"Yes": 0.9998296090781823, "No": 0.00016996347447980795}, "ground_truth": 0}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.3820717630601026e-07}, "ground_truth": 0}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999840716318578, "res": {"Yes": 0.9999840716318578, "No": 1.5275520167005093e-05}, "ground_truth": 0}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999206582977194, "res": {"Yes": 0.9999206582977194, "No": 7.767675568341151e-05}, "ground_truth": 1}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 2.1522281427260367e-07}, "ground_truth": 0}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "\"Yes": 1.3594329319018393e-07}, "ground_truth": 0}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 7.704407425013861e-06, "res": {"No": 0.9999921773835968, "Yes": 7.704407425013861e-06}, "ground_truth": 0}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0007621470819286697, "res": {"No": 0.9992374798163856, "Yes": 0.0007621470819286697}, "ground_truth": 0}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.25711440304055e-07}, "ground_truth": 1}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 9.240978182454101e-08}, "ground_truth": 0}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.322392127597753e-08}, "ground_truth": 0}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9985467745562164, "res": {"Yes": 0.9985467745562164, "No": 0.0014529853166286614}, "ground_truth": 0}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999670259236568, "res": {"Yes": 0.9999670259236568, "No": 3.22709213829796e-05}, "ground_truth": 0}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999978991308068, "res": {"Yes": 0.9999978991308068, "No": 1.2377863194658171e-06}, "ground_truth": 1}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999933694113825, "res": {"Yes": 0.9999933694113825, "No": 4.772825883851976e-06}, "ground_truth": 0}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999653571300781, "res": {"Yes": 0.9999653571300781, "No": 3.403891939213225e-05}, "ground_truth": 0}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999769195503577, "res": {"Yes": 0.9999769195503577, "No": 2.2771950969100286e-05}, "ground_truth": 0}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.0877198788135273e-07}, "ground_truth": 0}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 5.90135947554092e-08}, "ground_truth": 1}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.233957705961111e-07}, "ground_truth": 0}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.2947367956822142e-07}, "ground_truth": 0}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.1024242857879072e-07}, "ground_truth": 0}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999042096381897, "res": {"Yes": 0.9999042096381897, "No": 9.502020545298859e-05}, "ground_truth": 0}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.3103701452893953e-07}, "ground_truth": 1}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "1": 2.4132602906309045e-07}, "ground_truth": 0}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999946806438478, "res": {"Yes": 0.9999946806438478, "No": 4.907048093038559e-06}, "ground_truth": 0}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9495969543401543, "res": {"Yes": 0.9495969543401543, "No": 0.05040248497806169}, "ground_truth": 0}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 7.963134079598622e-07}, "ground_truth": 0}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994010368500871, "res": {"Yes": 0.9994010368500871, "No": 0.0005973520021419725}, "ground_truth": 1}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 6.579433523372374e-07}, "ground_truth": 0}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.0476886125384074e-07}, "ground_truth": 0}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 7.405527278753274e-07}, "ground_truth": 0}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.021704240994425947, "res": {"No": 0.9782952626120673, "Yes": 0.021704240994425947}, "ground_truth": 0}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "\"Yes": 1.3780475120085868e-07}, "ground_truth": 1}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994485498045803, "res": {"Yes": 0.9994485498045803, "No": 0.0005508910498679233}, "ground_truth": 0}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999312666422455, "res": {"Yes": 0.9999312666422455, "No": 6.788041526383929e-05}, "ground_truth": 0}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0003639795038562931, "res": {"No": 0.9996357605513003, "Yes": 0.0003639795038562931}, "ground_truth": 0}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999964686909351, "res": {"Yes": 0.9999964686909351, "No": 3.2069058350536687e-06}, "ground_truth": 0}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999950382530095, "res": {"Yes": 0.9999950382530095, "No": 4.790537212056221e-06}, "ground_truth": 1}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.548153107950349e-08}, "ground_truth": 0}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.571223990070334e-08}, "ground_truth": 0}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999293595151566, "res": {"Yes": 0.9999293595151566, "No": 7.021307575228366e-05}, "ground_truth": 0}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.06233077465211241, "res": {"No": 0.9376686162280856, "Yes": 0.06233077465211241}, "ground_truth": 0}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 9.8402503317427e-07}, "ground_truth": 1}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999977799274644, "res": {"Yes": 0.9999977799274644, "No": 1.4280681087231252e-06}, "ground_truth": 0}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 9.005979485012689e-08}, "ground_truth": 0}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 4.712838898409434e-05, "res": {"No": 0.999952483661937, "Yes": 4.712838898409434e-05}, "ground_truth": 0}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999983237218497, "res": {"Yes": 0.999983237218497, "No": 1.626691935134129e-05}, "ground_truth": 0}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9710751415000668, "res": {"Yes": 0.9710751415000668, "No": 0.028923536602746765}, "ground_truth": 1}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999647611309035, "res": {"Yes": 0.9999647611309035, "No": 3.397012826388895e-05}, "ground_truth": 0}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999143410289392, "res": {"Yes": 0.9999143410289392, "No": 8.424696610528196e-05}, "ground_truth": 0}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.1469573470873915e-07}, "ground_truth": 0}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 2.2788992879753203e-06, "res": {"No": 0.9999967070975216, "Yes": 2.2788992879753203e-06}, "ground_truth": 0}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8894898169522104, "res": {"Yes": 0.8894898169522104, "No": 0.11050973084012139}, "ground_truth": 1}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 1.5250307258645e-06}, "ground_truth": 0}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 8.547533347461456e-08}, "ground_truth": 0}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 5.876578183635989e-07}, "ground_truth": 0}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997666895911075, "res": {"Yes": 0.9997666895911075, "No": 0.00023303730731132026}, "ground_truth": 0}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.8516898432620512e-07}, "ground_truth": 1}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.170693901201355e-07}, "ground_truth": 0}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.140125134753171e-07}, "ground_truth": 0}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997067679209325, "res": {"Yes": 0.9997067679209325, "No": 0.00029205307748665155}, "ground_truth": 0}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8108838932789092, "res": {"Yes": 0.8108838932789092, "No": 0.18911446733077417}, "ground_truth": 1}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 3.3960386552213037e-07}, "ground_truth": 0}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9052676311252081, "res": {"Yes": 0.9052676311252081, "No": 0.09473122572837024}, "ground_truth": 0}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.254666312729386e-07}, "ground_truth": 0}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999996945503965, "res": {"Yes": 0.999996945503965, "No": 2.6891456957888334e-06}, "ground_truth": 0}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999956342685299, "res": {"Yes": 0.9999956342685299, "No": 3.406153190760095e-06}, "ground_truth": 1}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 6.498553742289635e-07}, "ground_truth": 0}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999783499623655, "res": {"Yes": 0.9999783499623655, "No": 2.1197400536650885e-05}, "ground_truth": 0}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999173208681078, "res": {"Yes": 0.9999173208681078, "No": 8.21073516867694e-05}, "ground_truth": 0}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.37024349224795e-08}, "ground_truth": 0}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 1.0568603278164826e-06}, "ground_truth": 1}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.2825786875023488e-07}, "ground_truth": 0}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.960906735516764e-08}, "ground_truth": 0}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9997450108152697, "res": {"Yes": 0.9997450108152697, "No": 0.00025492835833115833}, "ground_truth": 0}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9287967163248498, "res": {"Yes": 0.9287967163248498, "No": 0.07120186977672212}, "ground_truth": 0}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.714982888963042e-07}, "ground_truth": 1}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7846417380692877, "res": {"Yes": 0.7846417380692877, "No": 0.21535770266500412}, "ground_truth": 0}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.01848119038630134, "res": {"No": 0.9815184408559426, "Yes": 0.01848119038630134}, "ground_truth": 0}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00016971279343959403, "res": {"No": 0.9998297282578789, "Yes": 0.00016971279343959403}, "ground_truth": 0}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.945637356240305e-08}, "ground_truth": 0}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.7065071017744196e-07}, "ground_truth": 1}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.318407608225226e-07}, "ground_truth": 0}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999085005663285, "res": {"Yes": 0.9999085005663285, "No": 9.119650353964582e-05}, "ground_truth": 0}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.999934127339699, "res": {"Yes": 0.999934127339699, "No": 6.567225631295136e-05}, "ground_truth": 0}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 8.742687624831381e-07}, "ground_truth": 0}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 2.6054167701722095e-07}, "ground_truth": 1}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 6.293017868417193e-08}, "ground_truth": 0}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 7.120405321341632e-07}, "ground_truth": 0}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.3432074171017574, "res": {"No": 0.6567921058657262, "Yes": 0.3432074171017574}, "ground_truth": 0}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.003497820215765552, "res": {"No": 0.9965019249700944, "Yes": 0.003497820215765552}, "ground_truth": 0}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.767106832051595e-08}, "ground_truth": 1}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.34114328850794e-08}, "ground_truth": 0}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999737011318213, "res": {"Yes": 0.9999737011318213, "No": 2.553411900317249e-05}, "ground_truth": 0}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.318036942594633e-05, "res": {"No": 0.9999865748701179, "Yes": 1.318036942594633e-05}, "ground_truth": 0}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994298526657966, "res": {"Yes": 0.9994298526657966, "No": 0.000569580917504417}, "ground_truth": 0}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.000721691745634e-08}, "ground_truth": 1}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "YES": 4.975927293784244e-08}, "ground_truth": 0}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.086356423868509e-08}, "ground_truth": 0}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999917005724405, "res": {"Yes": 0.9999917005724405, "No": 7.57074560991507e-06}, "ground_truth": 0}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9635248693425058, "res": {"Yes": 0.9635248693425058, "No": 0.036474288181756416}, "ground_truth": 0}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999728667271139, "res": {"Yes": 0.9999728667271139, "No": 2.6525367996191307e-05}, "ground_truth": 1}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999902701413353, "res": {"Yes": 0.9999902701413353, "No": 9.204344105254079e-06}, "ground_truth": 0}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 6.174547811700847e-07}, "ground_truth": 0}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999804955832136, "res": {"Yes": 0.9999804955832136, "No": 1.9108830498285357e-05}, "ground_truth": 0}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.0901209473515364e-07}, "ground_truth": 0}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.2758333308871551e-06}, "ground_truth": 1}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.475963984022922e-08}, "ground_truth": 0}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "YES": 6.256017145901234e-08}, "ground_truth": 0}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.01621757416080335, "res": {"No": 0.9837817817873254, "Yes": 0.01621757416080335}, "ground_truth": 0}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.6782568256607002e-07}, "ground_truth": 0}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.867303465450529e-08}, "ground_truth": 1}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991941493721612, "res": {"Yes": 0.9991941493721612, "No": 0.0008031570371994042}, "ground_truth": 0}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.3698217898238856e-07}, "ground_truth": 0}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 1.3794923345385694e-06}, "ground_truth": 0}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.6718564011893478, "res": {"Yes": 0.6718564011893478, "No": 0.32814312797513534}, "ground_truth": 0}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.398507576112392e-07}, "ground_truth": 1}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.223869447879084e-08}, "ground_truth": 0}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 8.012395210851182e-08}, "ground_truth": 0}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.0464397108356373e-07}, "ground_truth": 0}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 2.3893698042852905e-07}, "ground_truth": 0}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999964686909351, "res": {"Yes": 0.9999964686909351, "No": 1.6934647157676988e-06}, "ground_truth": 1}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999922965856715, "res": {"Yes": 0.9999922965856715, "No": 6.923320931884575e-06}, "ground_truth": 0}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.699371171865564e-08}, "ground_truth": 0}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9982632658711336, "res": {"Yes": 0.9982632658711336, "No": 0.0017361764954949032}, "ground_truth": 0}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999211350800014, "res": {"Yes": 0.9999211350800014, "No": 7.834377840789601e-05}, "ground_truth": 0}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.808177559844036e-08}, "ground_truth": 1}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.614533336324532e-08}, "ground_truth": 0}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999572515937392, "res": {"Yes": 0.9999572515937392, "No": 4.194178385058709e-05}, "ground_truth": 0}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 5.555277989591316e-05, "res": {"No": 0.9999440206399028, "Yes": 5.555277989591316e-05}, "ground_truth": 0}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 2.0893241719039145e-07}, "ground_truth": 1}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999980183344636, "res": {"Yes": 0.9999980183344636, "No": 1.3472096910619447e-06}, "ground_truth": 0}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994316397279254, "res": {"Yes": 0.9994316397279254, "No": 0.000567426348165237}, "ground_truth": 0}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.0940623017465631e-07}, "ground_truth": 0}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995350000659958, "res": {"Yes": 0.9995350000659958, "No": 0.0004645125415347802}, "ground_truth": 0}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9988946898058092, "res": {"Yes": 0.9988946898058092, "No": 0.0011047844841532363}, "ground_truth": 1}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9687346379890529, "res": {"Yes": 0.9687346379890529, "No": 0.03126383127245415}, "ground_truth": 0}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999933694113825, "res": {"Yes": 0.9999933694113825, "No": 6.035580615536345e-06}, "ground_truth": 0}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 5.7589517193131326e-05, "res": {"No": 0.9999421134904887, "Yes": 5.7589517193131326e-05}, "ground_truth": 0}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.9092728542103408e-05, "res": {"No": 0.9999806147848957, "Yes": 1.9092728542103408e-05}, "ground_truth": 0}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 8.250681012853892e-08}, "ground_truth": 1}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 8.052532602505414e-07}, "ground_truth": 0}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 4.988052988506419e-07}, "ground_truth": 0}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.009615024998468997, "res": {"No": 0.9903837071149252, "Yes": 0.009615024998468997}, "ground_truth": 0}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999958726752174, "res": {"Yes": 0.9999958726752174, "No": 3.754118702605707e-06}, "ground_truth": 0}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.96675090412001e-08}, "ground_truth": 1}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.1023129465577676e-07}, "ground_truth": 0}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9966206661452248, "res": {"Yes": 0.9966206661452248, "No": 0.0033788833293679522}, "ground_truth": 0}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 6.440200356939476e-07}, "ground_truth": 0}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.04477197022462035, "res": {"No": 0.9552276881568845, "Yes": 0.04477197022462035}, "ground_truth": 0}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999996945503965, "res": {"Yes": 0.999996945503965, "No": 2.7085611636271373e-06}, "ground_truth": 1}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998944359222794, "res": {"Yes": 0.9998944359222794, "No": 0.00010524885124480675}, "ground_truth": 0}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.036069195554638e-07}, "ground_truth": 0}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999908661547138, "res": {"Yes": 0.9999908661547138, "No": 8.977985793264746e-06}, "ground_truth": 0}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998217430096656, "res": {"Yes": 0.9998217430096656, "No": 0.00017819359256488905}, "ground_truth": 0}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 9.462520109620357e-08}, "ground_truth": 1}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 7.329818423883387e-08}, "ground_truth": 0}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996912797639023, "res": {"Yes": 0.9996912797639023, "No": 0.00029871518412602006}, "ground_truth": 0}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.24855187567510106, "res": {"No": 0.7514475995734822, "Yes": 0.24855187567510106}, "ground_truth": 0}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 3.686648025202657e-07}, "ground_truth": 0}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.2920592537924673e-07}, "ground_truth": 1}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 5.5161146458529375e-08}, "ground_truth": 0}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.0331874660362628e-07}, "ground_truth": 0}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999890781166442, "res": {"Yes": 0.9999890781166442, "No": 1.0271885067406207e-05}, "ground_truth": 0}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.6001197416396088e-05, "res": {"No": 0.9999827604126034, "Yes": 1.6001197416396088e-05}, "ground_truth": 0}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999973031140366, "res": {"Yes": 0.9999973031140366, "No": 1.981037223830732e-06}, "ground_truth": 1}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.222634126110282e-07}, "ground_truth": 0}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.0295611576915268e-07}, "ground_truth": 0}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.44374185202273303, "res": {"No": 0.5562564778480322, "Yes": 0.44374185202273303}, "ground_truth": 0}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.00014666815089978805, "res": {"No": 0.9998524888808352, "Yes": 0.00014666815089978805}, "ground_truth": 0}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999980183344636, "res": {"Yes": 0.9999980183344636, "No": 1.6706312649444217e-06}, "ground_truth": 1}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999922965856715, "res": {"Yes": 0.9999922965856715, "No": 6.953078825546737e-06}, "ground_truth": 0}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 2.2746669610928312e-07}, "ground_truth": 0}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.40440626494693355, "res": {"No": 0.5955934711711958, "Yes": 0.40440626494693355}, "ground_truth": 0}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 4.302643713880258e-07}, "ground_truth": 0}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.70438574475013e-08}, "ground_truth": 1}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.695667958716621e-08}, "ground_truth": 0}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 9.390594074738923e-06, "res": {"No": 0.9999905085465441, "Yes": 9.390594074738923e-06}, "ground_truth": 0}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9948789037540199, "res": {"Yes": 0.9948789037540199, "No": 0.005120822030427877}, "ground_truth": 0}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.009350405197402099, "res": {"No": 0.9906494458720226, "Yes": 0.009350405197402099}, "ground_truth": 0}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999955150656573, "res": {"Yes": 0.9999955150656573, "No": 4.281402136647282e-06}, "ground_truth": 1}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.23200139370145811, "res": {"No": 0.7679984454424423, "Yes": 0.23200139370145811}, "ground_truth": 0}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999740587314805, "res": {"Yes": 0.9999740587314805, "No": 2.5735350638149358e-05}, "ground_truth": 0}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.003198654584278878, "res": {"No": 0.9968010348274089, "Yes": 0.003198654584278878}, "ground_truth": 0}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9930464321141415, "res": {"Yes": 0.9930464321141415, "No": 0.006952513709343431}, "ground_truth": 0}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.796586459951431e-08}, "ground_truth": 1}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 2.2239427873795575e-07}, "ground_truth": 0}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.2063575833509922e-06}, "ground_truth": 0}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 3.1554132341824365e-05, "res": {"No": 0.9999677411203288, "Yes": 3.1554132341824365e-05}, "ground_truth": 0}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.9994710107664905e-07}, "ground_truth": 0}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.6986151768012614e-08}, "ground_truth": 1}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 6.716281485164882e-07}, "ground_truth": 0}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.3794316364700922e-07}, "ground_truth": 0}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9978609513322708, "res": {"Yes": 0.9978609513322708, "No": 0.002138478466049219}, "ground_truth": 0}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999977799274644, "res": {"Yes": 0.9999977799274644, "No": 1.7847896371744566e-06}, "ground_truth": 0}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.2811878442952934e-07}, "ground_truth": 1}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.4136947270042165e-07}, "ground_truth": 0}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.004947630519183744, "res": {"No": 0.9950521455587905, "Yes": 0.004947630519183744}, "ground_truth": 0}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 4.821575061648147e-07, "res": {"No": 0.9999989719621284, "Yes": 4.821575061648147e-07}, "ground_truth": 0}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996966386233704, "res": {"Yes": 0.9996966386233704, "No": 0.00030285906278172594}, "ground_truth": 0}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.796179885924367e-08}, "ground_truth": 1}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.617342875233836e-08}, "ground_truth": 0}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 4.2449950384257154e-07}, "ground_truth": 0}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 7.903562845902297e-07, "res": {"No": 0.9999988527586581, "Yes": 7.903562845902297e-07}, "ground_truth": 0}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999937270200753, "res": {"Yes": 0.9999937270200753, "No": 5.7468021826916425e-06}, "ground_truth": 1}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996263579215942, "res": {"Yes": 0.9996263579215942, "No": 0.00037330028820403786}, "ground_truth": 0}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.00937359111017879, "res": {"No": 0.9906127112903709, "Yes": 0.00937359111017879}, "ground_truth": 0}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999934886141991, "res": {"Yes": 0.9999934886141991, "No": 6.053360412733818e-06}, "ground_truth": 0}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 6.629886891537795e-08}, "ground_truth": 0}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 1.4032369163237875e-07}, "ground_truth": 1}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 3.3510631408175976e-07}, "ground_truth": 0}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.103084113415586e-08}, "ground_truth": 0}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.5479948758230496e-05, "res": {"No": 0.9999841908319662, "Yes": 1.5479948758230496e-05}, "ground_truth": 0}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 7.770944730709547e-08}, "ground_truth": 0}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 2.398212458669666e-07}, "ground_truth": 1}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 9.032287439943922e-07}, "ground_truth": 0}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 9.688452805434308e-08}, "ground_truth": 0}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 6.112827700943438e-05, "res": {"No": 0.9999385375988907, "Yes": 6.112827700943438e-05}, "ground_truth": 0}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 4.716299392006659e-05, "res": {"No": 0.9999522452652937, "Yes": 4.716299392006659e-05}, "ground_truth": 0}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.986182285442137e-08}, "ground_truth": 1}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.426484683820368e-08}, "ground_truth": 0}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999914621674475, "res": {"Yes": 0.9999914621674475, "No": 8.245648098570675e-06}, "ground_truth": 0}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.8178346027100278, "res": {"Yes": 0.8178346027100278, "No": 0.18216434588424724}, "ground_truth": 0}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.518538943122245e-07}, "ground_truth": 0}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.8038653380037034e-08}, "ground_truth": 1}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.568089200258007e-08}, "ground_truth": 0}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.181321940307972e-08}, "ground_truth": 0}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.891543506713311e-08}, "ground_truth": 0}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.4797610927945269e-07}, "ground_truth": 0}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.050829779469954e-08}, "ground_truth": 1}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.2907556560730176e-08}, "ground_truth": 0}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998165026676953, "res": {"Yes": 0.9998165026676953, "No": 0.00018318133391968108}, "ground_truth": 0}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 6.474870782127736e-07}, "ground_truth": 0}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9892541773312198, "res": {"Yes": 0.9892541773312198, "No": 0.010744713637613893}, "ground_truth": 0}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.92206226585185e-08}, "ground_truth": 1}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 1.673674738067249e-07}, "ground_truth": 0}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9904940964826109, "res": {"Yes": 0.9904940964826109, "No": 0.009505513312771881}, "ground_truth": 0}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.06833581992688376, "res": {"No": 0.9316632307432114, "Yes": 0.06833581992688376}, "ground_truth": 0}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.881257336309586, "res": {"Yes": 0.881257336309586, "No": 0.11874132397495823}, "ground_truth": 0}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997887336898705, "res": {"Yes": 0.9997887336898705, "No": 0.00021041579681497363}, "ground_truth": 1}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994471201644137, "res": {"Yes": 0.9994471201644137, "No": 0.0005461207242362751}, "ground_truth": 0}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 2.304103470642568e-07}, "ground_truth": 0}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.021454488530453314, "res": {"No": 0.9785451844020797, "Yes": 0.021454488530453314}, "ground_truth": 0}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987971580949502, "res": {"Yes": 0.9987971580949502, "No": 0.0012019175971533678}, "ground_truth": 0}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.026003415103768e-08}, "ground_truth": 1}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "\"Yes": 4.785941393234e-08}, "ground_truth": 0}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999766811478886, "res": {"Yes": 0.9999766811478886, "The": 2.1319679321184293e-05}, "ground_truth": 0}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.009205857617303801, "res": {"No": 0.9907924356344324, "Yes": 0.009205857617303801}, "ground_truth": 0}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9804961263911459, "res": {"Yes": 0.9804961263911459, "No": 0.019502599784831744}, "ground_truth": 0}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999953958625991, "res": {"Yes": 0.9999953958625991, "No": 2.5980614167077553e-06}, "ground_truth": 1}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.3679778820236954e-07}, "ground_truth": 0}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.44642271026291e-08}, "ground_truth": 0}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9960583328440751, "res": {"Yes": 0.9960583328440751, "No": 0.003941434640477497}, "ground_truth": 0}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.07694267614730574, "res": {"No": 0.9230572356009079, "Yes": 0.07694267614730574}, "ground_truth": 0}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.475262811131316e-07}, "ground_truth": 1}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 3.3650443774470033e-07}, "ground_truth": 0}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.621603220446287e-08}, "ground_truth": 0}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.004191687003276127, "res": {"No": 0.9958081315110037, "Yes": 0.004191687003276127}, "ground_truth": 0}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996968769710715, "res": {"Yes": 0.9996968769710715, "No": 0.0003009140145417341}, "ground_truth": 0}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999981375378344, "res": {"Yes": 0.9999981375378344, "No": 1.2576231592586403e-06}, "ground_truth": 1}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.6192331143505712e-07}, "ground_truth": 0}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999642843338196, "res": {"Yes": 0.9999642843338196, "No": 3.53765636803395e-05}, "ground_truth": 0}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 8.564833148421067e-07}, "ground_truth": 0}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 2.665865181660969e-07}, "ground_truth": 0}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.3105942532805737e-08}, "ground_truth": 1}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 7.889420967004399e-08}, "ground_truth": 0}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.11050292365036524, "res": {"No": 0.8894963191465378, "Yes": 0.11050292365036524}, "ground_truth": 0}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0003928214009098723, "res": {"No": 0.9996069388687747, "Yes": 0.0003928214009098723}, "ground_truth": 0}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996377862852326, "res": {"Yes": 0.9996377862852326, "No": 0.00036191578909851367}, "ground_truth": 0}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 1.2393259597274173e-07}, "ground_truth": 1}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999949190499081, "res": {"Yes": 0.9999949190499081, "No": 4.661391729574238e-06}, "ground_truth": 0}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 8.465988163301129e-08}, "ground_truth": 0}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.2207148019006263, "res": {"No": 0.7792846854502324, "Yes": 0.2207148019006263}, "ground_truth": 0}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999928926002577, "res": {"Yes": 0.9999928926002577, "No": 6.636452495307663e-06}, "ground_truth": 0}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999944422379444, "res": {"Yes": 0.9999944422379444, "No": 5.218294153471952e-06}, "ground_truth": 1}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999953958625991, "res": {"Yes": 0.9999953958625991, "No": 3.993889459474503e-06}, "ground_truth": 0}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 5.179982360508326e-07}, "ground_truth": 0}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9886099286676896, "res": {"Yes": 0.9886099286676896, "No": 0.011388588883766874}, "ground_truth": 0}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999056399421903, "res": {"Yes": 0.9999056399421903, "No": 9.320427203852479e-05}, "ground_truth": 1}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999806147848957, "res": {"Yes": 0.9999806147848957, "No": 1.9075037783678275e-05}, "ground_truth": 0}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.363490084193395e-07}, "ground_truth": 0}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.8172683462992132e-05, "res": {"No": 0.9999815683978641, "Yes": 1.8172683462992132e-05}, "ground_truth": 0}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.25901570035204474, "res": {"No": 0.7409839454352855, "Yes": 0.25901570035204474}, "ground_truth": 0}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 3.3725753487596584e-07}, "ground_truth": 1}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.080739741477775e-08}, "ground_truth": 0}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.184107804553905e-08}, "ground_truth": 0}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.8879867431380468, "res": {"Yes": 0.8879867431380468, "No": 0.11201287040038858}, "ground_truth": 0}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9694649069644453, "res": {"Yes": 0.9694649069644453, "No": 0.030534420216299835}, "ground_truth": 0}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999238765575363, "res": {"Yes": 0.9999238765575363, "No": 7.557817552406627e-05}, "ground_truth": 1}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 8.925307170644269e-08}, "ground_truth": 0}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.6843339958618356e-07}, "ground_truth": 0}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999716747231683, "res": {"Yes": 0.9999716747231683, "No": 2.7910404988650033e-05}, "ground_truth": 0}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999726900318509, "res": {"Yes": 0.999726900318509, "No": 0.00027169647920704897}, "ground_truth": 0}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.63777052087574e-08}, "ground_truth": 1}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 8.208315649009794e-08}, "ground_truth": 0}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.920121875282528e-08}, "ground_truth": 0}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 5.285713690381198e-07}, "ground_truth": 0}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9942581713709981, "res": {"Yes": 0.9942581713709981, "No": 0.005740637569461133}, "ground_truth": 0}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.285636039627462e-08}, "ground_truth": 1}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.256261379331209e-07}, "ground_truth": 0}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.912018365578319e-08}, "ground_truth": 0}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.7669990416161421e-07}, "ground_truth": 0}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.5557047384319878e-07}, "ground_truth": 0}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999977799274644, "res": {"Yes": 0.9999977799274644, "No": 1.5631211473932684e-06}, "ground_truth": 1}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.1814608142097277e-07}, "ground_truth": 0}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.695996470585257e-08}, "ground_truth": 0}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 7.2161390912122096e-06, "res": {"No": 0.9999915813694369, "Yes": 7.2161390912122096e-06}, "ground_truth": 0}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 6.829180294668063e-07, "res": {"No": 0.999999091165773, "Yes": 6.829180294668063e-07}, "ground_truth": 0}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.0012024376591872037, "res": {"No": 0.9987968009251502, "Yes": 0.0012024376591872037}, "ground_truth": 1}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9974300253537994, "res": {"Yes": 0.9974300253537994, "No": 0.0025689973304363163}, "ground_truth": 0}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999975415208221, "res": {"Yes": 0.9999975415208221, "No": 1.9658595072665925e-06}, "ground_truth": 0}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9376869665824629, "res": {"Yes": 0.9376869665824629, "No": 0.06230631462365607}, "ground_truth": 0}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.00046932812841535157, "res": {"No": 0.9995301149705649, "Yes": 0.00046932812841535157}, "ground_truth": 0}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999738203326934, "res": {"Yes": 0.9999738203326934, "No": 2.479570761038154e-05}, "ground_truth": 1}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999643744344122, "res": {"Yes": 0.999643744344122, "No": 0.0003421477146392099}, "ground_truth": 0}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999589203757908, "res": {"Yes": 0.9999589203757908, "No": 3.996977557871446e-05}, "ground_truth": 0}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 5.7493825415805475e-05, "res": {"No": 0.9999413983091462, "Yes": 5.7493825415805475e-05}, "ground_truth": 0}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999709595226828, "res": {"Yes": 0.9999709595226828, "No": 2.4207065611462952e-05}, "ground_truth": 0}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.7035931717998689e-07}, "ground_truth": 1}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 9.698841689935029e-05, "res": {"No": 0.9999025409494419, "Yes": 9.698841689935029e-05}, "ground_truth": 0}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999855020530962, "res": {"Yes": 0.9999855020530962, "No": 1.3833325301643274e-05}, "ground_truth": 0}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9997931433077965, "res": {"Yes": 0.9997931433077965, "No": 0.0002064186850729715}, "ground_truth": 0}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 4.213230322417117e-07}, "ground_truth": 0}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.5351308949065298e-07}, "ground_truth": 1}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.220867534600172e-08}, "ground_truth": 0}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.7483775175686655e-08}, "ground_truth": 0}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.24733061873699266, "res": {"No": 0.7526691677374767, "Yes": 0.24733061873699266}, "ground_truth": 0}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.03914157803974677, "res": {"No": 0.9608580852059829, "Yes": 0.03914157803974677}, "ground_truth": 0}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999980183344636, "res": {"Yes": 0.9999980183344636, "No": 1.4975561677211518e-06}, "ground_truth": 1}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999984906043415, "res": {"Yes": 0.999984906043415, "No": 1.4906234644716964e-05}, "ground_truth": 0}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 9.670670555486832e-07}, "ground_truth": 0}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9623139752184943, "res": {"Yes": 0.9623139752184943, "No": 0.03768517115385209}, "ground_truth": 0}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 3.599933830837501e-05, "res": {"No": 0.99996356913662, "Yes": 3.599933830837501e-05}, "ground_truth": 0}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 7.482969094226939e-07}, "ground_truth": 1}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 4.5772512351607473e-07}, "ground_truth": 0}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.63258281775661e-08}, "ground_truth": 0}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999958726752174, "res": {"Yes": 0.9999958726752174, "\"Yes": 2.2535495658622908e-06}, "ground_truth": 0}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 9.637933746054187e-08}, "ground_truth": 0}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.1452885923382949e-07}, "ground_truth": 1}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "\"Yes": 5.088645326116345e-07}, "ground_truth": 0}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.6276278546802612e-07}, "ground_truth": 0}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 2.5026409591767614e-06, "res": {"No": 0.9999968263007362, "Yes": 2.5026409591767614e-06}, "ground_truth": 0}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.782381977104418e-08}, "ground_truth": 0}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.2365913102147835e-07}, "ground_truth": 1}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.4921180952452224e-07}, "ground_truth": 0}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 6.507632058504256e-07}, "ground_truth": 0}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.695186307901785e-07}, "ground_truth": 0}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 8.204015618316126e-08}, "ground_truth": 0}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.0096791942656667e-07}, "ground_truth": 1}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 6.2484581914805e-08}, "ground_truth": 0}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.6340549924118767e-07}, "ground_truth": 0}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997639485445299, "res": {"Yes": 0.9997639485445299, "No": 0.00023561274776248353}, "ground_truth": 0}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.988974258786802e-08}, "ground_truth": 1}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 7.511211296779417e-08}, "ground_truth": 0}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.4979733011130275e-08}, "ground_truth": 0}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.3844350057828004, "res": {"No": 0.6155639743060952, "Yes": 0.3844350057828004}, "ground_truth": 0}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999971839107652, "res": {"Yes": 0.9999971839107652, "No": 1.6402465369789706e-06}, "ground_truth": 0}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.6713499462859331e-07}, "ground_truth": 1}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 6.077209646777762e-07}, "ground_truth": 0}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999430670627412, "res": {"Yes": 0.9999430670627412, "No": 5.627602259196574e-05}, "ground_truth": 0}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9998359258216405, "res": {"Yes": 0.9998359258216405, "No": 0.0001634433681171341}, "ground_truth": 0}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999921773835968, "res": {"Yes": 0.9999921773835968, "No": 7.3244609222603485e-06}, "ground_truth": 0}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.112795870775445e-07}, "ground_truth": 1}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.0724402410407381e-07}, "ground_truth": 0}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.736387108918013e-08}, "ground_truth": 0}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.10060694378407153, "res": {"No": 0.899392620134467, "Yes": 0.10060694378407153}, "ground_truth": 0}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999202959705496, "res": {"Yes": 0.999202959705496, "No": 0.0007963897948159853}, "ground_truth": 0}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "YES": 9.762970844882845e-08}, "ground_truth": 1}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.1803578449760709e-07}, "ground_truth": 0}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.075197806778709e-07}, "ground_truth": 0}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0005965207683598277, "res": {"No": 0.9994030621083583, "Yes": 0.0005965207683598277}, "ground_truth": 0}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999073086401096, "res": {"Yes": 0.9999073086401096, "No": 9.212400574213631e-05}, "ground_truth": 0}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9722584721711841, "res": {"Yes": 0.9722584721711841, "No": 0.0277403901407513}, "ground_truth": 1}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.4590268187348654e-07}, "ground_truth": 0}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999701251202631, "res": {"Yes": 0.9999701251202631, "No": 2.932019588598589e-05}, "ground_truth": 0}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9986285682731837, "res": {"Yes": 0.9986285682731837, "No": 0.0013712027562803545}, "ground_truth": 0}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994221125413146, "res": {"Yes": 0.9994221125413146, "No": 0.0005776816035387017}, "ground_truth": 0}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.531744919261393e-08}, "ground_truth": 1}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.0853848786502581e-07}, "ground_truth": 0}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 9.83813090115919e-08}, "ground_truth": 0}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.6022156109934986, "res": {"Yes": 0.6022156109934986, "No": 0.3977836955314106}, "ground_truth": 0}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.5671170398154672e-07}, "ground_truth": 0}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.0546748896467735e-06}, "ground_truth": 1}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999971839107652, "res": {"Yes": 0.9999971839107652, "No": 2.645363693071852e-06}, "ground_truth": 0}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 4.4801303901982535e-07}, "ground_truth": 0}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 2.0425490414472726e-06, "res": {"No": 0.9999974223173222, "Yes": 2.0425490414472726e-06}, "ground_truth": 0}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 4.585052508893724e-06, "res": {"No": 0.9999946806438478, "Yes": 4.585052508893724e-06}, "ground_truth": 0}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.1154787120768391e-07}, "ground_truth": 1}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.8082086353277865e-07}, "ground_truth": 0}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.0855455275000193e-07}, "ground_truth": 0}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 7.110969920938512e-07}, "ground_truth": 0}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999961110815618, "res": {"Yes": 0.9999961110815618, "No": 3.442984071368803e-06}, "ground_truth": 0}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.3113774625709914e-07}, "ground_truth": 1}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 1.8330709056620597e-07}, "ground_truth": 0}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 1.8260746279273684e-07}, "ground_truth": 0}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0006199673144151856, "res": {"No": 0.9993798498726276, "Yes": 0.0006199673144151856}, "ground_truth": 0}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9709125078027685, "res": {"Yes": 0.9709125078027685, "No": 0.029086861546670974}, "ground_truth": 0}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999903893441826, "res": {"Yes": 0.9999903893441826, "No": 8.957745329285106e-06}, "ground_truth": 1}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.1403517332574225e-08}, "ground_truth": 0}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999964686909351, "res": {"Yes": 0.9999964686909351, "No": 2.911657649383958e-06}, "ground_truth": 0}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.1425934685172662e-07}, "ground_truth": 0}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999965953125608, "res": {"Yes": 0.999965953125608, "No": 3.3594069820816274e-05}, "ground_truth": 0}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99998752848777, "res": {"Yes": 0.99998752848777, "No": 1.1953995308799592e-05}, "ground_truth": 1}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.46712821180844e-08}, "ground_truth": 0}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.1034533276199143e-07}, "ground_truth": 0}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999896741293122, "res": {"Yes": 0.9999896741293122, "No": 1.0044392895507128e-05}, "ground_truth": 0}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998667913030664, "res": {"Yes": 0.9998667913030664, "No": 0.0001325636036721638}, "ground_truth": 0}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.201908151711034e-08}, "ground_truth": 1}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.2819157657009612e-07}, "ground_truth": 0}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999981375378344, "res": {"Yes": 0.9999981375378344, "No": 1.5588315870355482e-06}, "ground_truth": 0}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9509613202279735, "res": {"Yes": 0.9509613202279735, "No": 0.04903835656738707}, "ground_truth": 0}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9473534444796945, "res": {"Yes": 0.9473534444796945, "No": 0.0526461272296148}, "ground_truth": 0}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.590035556032e-08}, "ground_truth": 1}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 9.496209737097087e-08}, "ground_truth": 0}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999454510038506, "res": {"Yes": 0.9999454510038506, "No": 5.424541962411951e-05}, "ground_truth": 0}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9982678920334795, "res": {"Yes": 0.9982678920334795, "No": 0.0017314414661112932}, "ground_truth": 0}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9967762743978719, "res": {"Yes": 0.9967762743978719, "No": 0.0032232297312147808}, "ground_truth": 0}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999855020530962, "res": {"Yes": 0.9999855020530962, "No": 1.2831493233779332e-05}, "ground_truth": 1}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.29221855070152e-08}, "ground_truth": 0}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.9407832931816098e-07}, "ground_truth": 0}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9980649126004851, "res": {"Yes": 0.9980649126004851, "No": 0.0019346876243050736}, "ground_truth": 0}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999967070975216, "res": {"Yes": 0.9999967070975216, "No": 2.880643071575922e-06}, "ground_truth": 0}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.1159262310980677e-07}, "ground_truth": 1}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.771925559231193e-08}, "ground_truth": 0}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999975415208221, "res": {"Yes": 0.9999975415208221, "No": 2.2416687315664946e-06}, "ground_truth": 0}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.2108463258506482e-06, "res": {"No": 0.9999981375378344, "Yes": 1.2108463258506482e-06}, "ground_truth": 0}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9847330300785928, "res": {"Yes": 0.9847330300785928, "No": 0.015264342898563488}, "ground_truth": 0}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999776347571058, "res": {"Yes": 0.9999776347571058, "No": 2.2035164374448682e-05}, "ground_truth": 1}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.3199523678146767e-07}, "ground_truth": 0}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999951574563252, "res": {"Yes": 0.9999951574563252, "No": 4.02222631068757e-06}, "ground_truth": 0}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 4.410511419865017e-07}, "ground_truth": 0}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999902701413353, "res": {"Yes": 0.9999902701413353, "No": 9.33440053460919e-06}, "ground_truth": 0}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 1.9613318228744037e-08}, "ground_truth": 1}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.712967461611084e-08}, "ground_truth": 0}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.2595433694471356e-08}, "ground_truth": 0}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.1646803258579253e-07}, "ground_truth": 0}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.7826695701179472e-08}, "ground_truth": 0}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999894357248024, "res": {"Yes": 0.9999894357248024, "No": 1.0300470336843794e-05}, "ground_truth": 1}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.977792498661405e-08}, "ground_truth": 0}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999977799274644, "res": {"Yes": 0.9999977799274644, "No": 2.0081975088316038e-06}, "ground_truth": 0}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.5433072072797942e-07}, "ground_truth": 0}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999937270200753, "res": {"Yes": 0.9999937270200753, "No": 5.775128792924446e-06}, "ground_truth": 0}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.2069067131785804e-07}, "ground_truth": 1}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.5433488771369302e-07}, "ground_truth": 0}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 4.754196258775542e-07}, "ground_truth": 0}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9952436325120967, "res": {"Yes": 0.9952436325120967, "No": 0.0047550664895575035}, "ground_truth": 0}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.1934149109340122, "res": {"No": 0.8065843493233175, "Yes": 0.1934149109340122}, "ground_truth": 0}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999977799274644, "res": {"Yes": 0.9999977799274644, "No": 1.7703947741579375e-06}, "ground_truth": 1}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.2749112367661146e-06}, "ground_truth": 0}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995803782358496, "res": {"Yes": 0.9995803782358496, "No": 0.00041922474399240907}, "ground_truth": 0}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9986834858660825, "res": {"Yes": 0.9986834858660825, "No": 0.0013159361045911786}, "ground_truth": 0}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.2374905238844441e-06}, "ground_truth": 0}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 8.671369562056953e-08}, "ground_truth": 1}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.323964508594902e-07}, "ground_truth": 0}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998879996225106, "res": {"Yes": 0.9998879996225106, "No": 0.00011141439720473818}, "ground_truth": 0}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.75172308359828e-08}, "ground_truth": 0}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.8737104072951296, "res": {"Yes": 0.8737104072951296, "No": 0.12628932173915666}, "ground_truth": 0}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.039450807328077e-08}, "ground_truth": 1}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "YES": 8.024382700106916e-08}, "ground_truth": 0}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "YES": 7.040672871582135e-08}, "ground_truth": 0}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9659414745084186, "res": {"Yes": 0.9659414745084186, "No": 0.03405770456099594}, "ground_truth": 0}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999913429644723, "res": {"Yes": 0.9999913429644723, "No": 7.5792295951430445e-06}, "ground_truth": 0}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 9.623112373990196e-08}, "ground_truth": 1}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.70123266722819e-08}, "ground_truth": 0}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 4.752832442214832e-08}, "ground_truth": 0}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.159582905466158e-08}, "ground_truth": 0}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.3043199694539155, "res": {"No": 0.6956792448323589, "Yes": 0.3043199694539155}, "ground_truth": 0}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.2768237617894126e-06}, "ground_truth": 1}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 3.609216276879449e-07}, "ground_truth": 0}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.3859206540354749e-07}, "ground_truth": 0}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.093985516597534e-05, "res": {"No": 0.9999884821053314, "Yes": 1.093985516597534e-05}, "ground_truth": 0}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.274541067320155e-08}, "ground_truth": 1}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.491059954562009e-08}, "ground_truth": 0}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.00247879357217336, "res": {"No": 0.9975209968160558, "Yes": 0.00247879357217336}, "ground_truth": 0}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999722707254635, "res": {"Yes": 0.9999722707254635, "No": 2.7013927273701427e-05}, "ground_truth": 0}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999992773397112, "res": {"Yes": 0.999992773397112, "No": 6.681219681712522e-06}, "ground_truth": 0}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 1.9603355370405663e-07}, "ground_truth": 1}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999964686909351, "res": {"Yes": 0.9999964686909351, "No": 2.974016981369328e-06}, "ground_truth": 0}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.998807390025206, "res": {"Yes": 0.998807390025206, "No": 0.0011914985242393698}, "ground_truth": 0}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.020835196143857614, "res": {"No": 0.9791642737799681, "Yes": 0.020835196143857614}, "ground_truth": 0}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, " Yes": 1.661531306520201e-07}, "ground_truth": 0}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999961110815618, "res": {"Yes": 0.9999961110815618, "No": 2.1843986165509194e-06}, "ground_truth": 1}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.4303456823368843e-07}, "ground_truth": 0}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 7.66265593113209e-07}, "ground_truth": 0}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 5.714727707948385e-07}, "ground_truth": 0}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0003716839357188652, "res": {"No": 0.9996279070137725, "Yes": 0.0003716839357188652}, "ground_truth": 0}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999949190499081, "res": {"Yes": 0.9999949190499081, "No": 4.291810011081597e-06}, "ground_truth": 1}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.3769854468555555e-07}, "ground_truth": 0}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999920181525664, "res": {"Yes": 0.999920181525664, "No": 7.91338369756188e-05}, "ground_truth": 0}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.9298043894214894e-07}, "ground_truth": 0}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9927438082085472, "res": {"Yes": 0.9927438082085472, "No": 0.007255888012988649}, "ground_truth": 0}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999453318073601, "res": {"Yes": 0.9999453318073601, "No": 5.431250841510684e-05}, "ground_truth": 1}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995232081013999, "res": {"Yes": 0.9995232081013999, "No": 0.0004766303095303361}, "ground_truth": 0}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999214625108432, "res": {"Yes": 0.999214625108432, "No": 0.000784967690207702}, "ground_truth": 0}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999425902750013, "res": {"Yes": 0.9999425902750013, "No": 5.701358415581511e-05}, "ground_truth": 0}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.4163917986466603e-07}, "ground_truth": 1}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.293362853284842e-08}, "ground_truth": 0}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.8356727332041289, "res": {"Yes": 0.8356727332041289, "No": 0.16432537874203657}, "ground_truth": 0}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.999996945503965, "res": {"Yes": 0.999996945503965, "No": 2.41349657728742e-06}, "ground_truth": 0}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.511702729506509e-07}, "ground_truth": 0}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.20426366715331e-08}, "ground_truth": 1}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 5.30188093480798e-07}, "ground_truth": 0}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.4634924246633686e-07}, "ground_truth": 0}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9996541077345276, "res": {"Yes": 0.9996541077345276, "No": 0.0003453072353429463}, "ground_truth": 0}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.667435838766081e-07}, "ground_truth": 0}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.0527940720887968e-07}, "ground_truth": 1}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999871708812939, "res": {"Yes": 0.9999871708812939, "No": 1.2360495719293832e-05}, "ground_truth": 0}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.6340795034205968e-07}, "ground_truth": 0}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.3340732141895027e-08}, "ground_truth": 0}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 2.803745096559537e-06, "res": {"No": 0.9999968263007362, "Yes": 2.803745096559537e-06}, "ground_truth": 0}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.9737731836645275e-08}, "ground_truth": 1}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.451599684535306e-08}, "ground_truth": 0}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9672034303564824, "res": {"Yes": 0.9672034303564824, "No": 0.032796138473144484}, "ground_truth": 0}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999962302846054, "res": {"Yes": 0.9999962302846054, "No": 3.242389499933609e-06}, "ground_truth": 0}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999968263007362, "res": {"Yes": 0.9999968263007362, "No": 2.974689185163284e-06}, "ground_truth": 0}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.3131799663846704e-07}, "ground_truth": 1}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "YES": 7.539732801705592e-08}, "ground_truth": 0}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "yes": 4.470443635241136e-07}, "ground_truth": 0}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.003759822237344256, "res": {"No": 0.9962399013416046, "Yes": 0.003759822237344256}, "ground_truth": 0}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.8926909880576106, "res": {"Yes": 0.8926909880576106, "No": 0.10730832995209065}, "ground_truth": 0}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 1.7422626655446681e-07}, "ground_truth": 1}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999978991308068, "res": {"Yes": 0.9999978991308068, " Yes": 7.866180738754887e-07}, "ground_truth": 0}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 2.183020898811525e-07}, "ground_truth": 0}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.004629867634285014, "res": {"No": 0.995370109141923, "Yes": 0.004629867634285014}, "ground_truth": 0}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981904407301968, "res": {"Yes": 0.9981904407301968, "No": 0.0018087492403587664}, "ground_truth": 0}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9980291792226901, "res": {"Yes": 0.9980291792226901, "No": 0.0019703383494301097}, "ground_truth": 1}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9895886423308354, "res": {"Yes": 0.9895886423308354, "No": 0.01041054546034819}, "ground_truth": 0}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9926066065976497, "res": {"Yes": 0.9926066065976497, "No": 0.007391851752285821}, "ground_truth": 0}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 5.750590573467699e-06, "res": {"No": 0.9999938462231346, "Yes": 5.750590573467699e-06}, "ground_truth": 0}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 3.069429307383347e-07}, "ground_truth": 0}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.3811681836297402e-07}, "ground_truth": 1}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.2647843722849105e-07}, "ground_truth": 0}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.586777665871829e-08}, "ground_truth": 0}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999977799274644, "res": {"Yes": 0.9999977799274644, "No": 2.0567160451834506e-06}, "ground_truth": 0}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999781115595554, "res": {"Yes": 0.9999781115595554, "No": 2.18479819490341e-05}, "ground_truth": 0}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.4148642596224184e-08}, "ground_truth": 1}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.1126957236537632e-07}, "ground_truth": 0}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.699717331016106e-08}, "ground_truth": 0}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 4.246051384083107e-05, "res": {"No": 0.999957132395842, "Yes": 4.246051384083107e-05}, "ground_truth": 0}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.003842577167979089, "res": {"No": 0.9961573273350881, "Yes": 0.003842577167979089}, "ground_truth": 0}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.973611810140297, "res": {"Yes": 0.973611810140297, "No": 0.026384541224893657}, "ground_truth": 1}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.2324778448082318e-07}, "ground_truth": 0}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998914561513098, "res": {"Yes": 0.9998914561513098, "No": 0.0001079782219267826}, "ground_truth": 0}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.7490604882855703, "res": {"Yes": 0.7490604882855703, "No": 0.2509382966879422}, "ground_truth": 0}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 1.2630514235814855e-06}, "ground_truth": 0}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989934218400078, "res": {"Yes": 0.9989934218400078, "No": 0.0010056678809830717}, "ground_truth": 1}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.536369304963555e-07}, "ground_truth": 0}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 6.70530933711192e-07}, "ground_truth": 0}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 7.374126576963476e-05, "res": {"No": 0.999926022056503, "Yes": 7.374126576963476e-05}, "ground_truth": 0}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.2656563560405666, "res": {"No": 0.7343433943915663, "Yes": 0.2656563560405666}, "ground_truth": 0}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.994298135764156e-08}, "ground_truth": 1}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.058327330745036e-08}, "ground_truth": 0}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.149691160035883e-08}, "ground_truth": 0}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9997992214587097, "res": {"Yes": 0.9997992214587097, "No": 0.00020052051444854996}, "ground_truth": 0}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999866940725246, "res": {"Yes": 0.9999866940725246, "No": 1.3041278001735599e-05}, "ground_truth": 0}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.823535531806116e-08}, "ground_truth": 1}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.422808380126246e-07}, "ground_truth": 0}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 3.694959712002022e-07}, "ground_truth": 0}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9748376175082732, "res": {"Yes": 0.9748376175082732, "No": 0.025161407669668602}, "ground_truth": 0}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9906309585445696, "res": {"Yes": 0.9906309585445696, "No": 0.009368404263091655}, "ground_truth": 0}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 5.908522167879407e-08}, "ground_truth": 1}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 9.173737148275381e-08}, "ground_truth": 0}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.2539149926508878e-07}, "ground_truth": 0}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999980183344636, "res": {"Yes": 0.9999980183344636, "No": 1.5415641117327285e-06}, "ground_truth": 0}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999958726752174, "res": {"Yes": 0.9999958726752174, "No": 3.3916500994455978e-06}, "ground_truth": 0}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999760851449647, "res": {"Yes": 0.9999760851449647, "No": 2.328377015440406e-05}, "ground_truth": 1}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998313968351115, "res": {"Yes": 0.9998313968351115, "No": 0.0001679013421779475}, "ground_truth": 0}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.3610610208433228e-06}, "ground_truth": 0}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 2.0029250777333713e-07, "res": {"No": 0.9999995679800934, "Yes": 2.0029250777333713e-07}, "ground_truth": 0}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 2.3927469513283288e-06, "res": {"No": 0.9999973031140366, "Yes": 2.3927469513283288e-06}, "ground_truth": 0}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999931310055916, "res": {"Yes": 0.9999931310055916, "No": 6.323966094705105e-06}, "ground_truth": 1}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9926775477489489, "res": {"Yes": 0.9926775477489489, "No": 0.0073219946130901065}, "ground_truth": 0}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 3.762940316836144e-07}, "ground_truth": 0}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.4911618153529924e-06, "res": {"No": 0.9999980183344636, "Yes": 1.4911618153529924e-06}, "ground_truth": 0}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997977913069341, "res": {"Yes": 0.9997977913069341, "No": 0.00020165787189308851}, "ground_truth": 0}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999936078174301, "res": {"Yes": 0.9999936078174301, "No": 5.490671481472236e-06}, "ground_truth": 1}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.0098732310613157e-07}, "ground_truth": 0}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, " Yes": 2.579219028448709e-07}, "ground_truth": 0}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00013697296327083082, "res": {"No": 0.9998626197375327, "Yes": 0.00013697296327083082}, "ground_truth": 0}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996307668233536, "res": {"Yes": 0.9996307668233536, "No": 0.00036877284488451913}, "ground_truth": 0}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.928788719298674e-08}, "ground_truth": 1}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.941957143783937e-08}, "ground_truth": 0}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.3706605833467974e-08}, "ground_truth": 0}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0028134094604691903, "res": {"No": 0.9971862496369251, "Yes": 0.0028134094604691903}, "ground_truth": 0}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9924292727230798, "res": {"Yes": 0.9924292727230798, "No": 0.007569953910443447}, "ground_truth": 0}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4830449140023189, "res": {"No": 0.5169540740728642, "Yes": 0.4830449140023189}, "ground_truth": 1}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999779923581718, "res": {"Yes": 0.9999779923581718, "No": 2.1576301464714874e-05}, "ground_truth": 0}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999938462231346, "res": {"Yes": 0.9999938462231346, "No": 5.604226571439284e-06}, "ground_truth": 0}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.6577286360098192, "res": {"Yes": 0.6577286360098192, "No": 0.3422706775945576}, "ground_truth": 0}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 7.594260343054627e-08}, "ground_truth": 0}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.791993591956357e-08}, "ground_truth": 1}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999975415208221, "res": {"Yes": 0.9999975415208221, "No": 1.9620092151846435e-06}, "ground_truth": 0}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 5.013346473822292e-07}, "ground_truth": 0}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.2969372949768911e-06, "res": {"No": 0.9999975415208221, "Yes": 1.2969372949768911e-06}, "ground_truth": 0}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 6.842469133727436e-07}, "ground_truth": 0}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 3.9065500758054894e-07}, "ground_truth": 1}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999936078174301, "res": {"Yes": 0.9999936078174301, "No": 6.067851314450099e-06}, "ground_truth": 0}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 5.051153590478252e-07}, "ground_truth": 0}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.4219903682339667e-06}, "ground_truth": 0}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.239591223436685e-08}, "ground_truth": 0}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.74092064894314e-08}, "ground_truth": 1}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999907469518097, "res": {"Yes": 0.9999907469518097, "No": 8.825873322363029e-06}, "ground_truth": 0}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.3545206812264176e-07}, "ground_truth": 0}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.3832038862210588e-05, "res": {"No": 0.9999858596579756, "Yes": 1.3832038862210588e-05}, "ground_truth": 0}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997188003440366, "res": {"Yes": 0.9997188003440366, "No": 0.0002808912427571192}, "ground_truth": 0}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999909853566321, "res": {"Yes": 0.9999909853566321, "No": 8.812036385036161e-06}, "ground_truth": 1}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.92548827178622e-08}, "ground_truth": 0}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 5.7686076560232146e-08}, "ground_truth": 0}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.040490174258046e-08}, "ground_truth": 0}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 1.8697068445527537e-07}, "ground_truth": 0}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995577466228676, "res": {"Yes": 0.9995577466228676, "No": 0.00044164582411879616}, "ground_truth": 1}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.876929711004399e-08}, "ground_truth": 0}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.0621866396816049e-07}, "ground_truth": 0}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 8.616445554394021e-07, "res": {"No": 0.9999989719621284, "Yes": 8.616445554394021e-07}, "ground_truth": 0}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.996784213553159, "res": {"Yes": 0.996784213553159, "No": 0.003215530265680411}, "ground_truth": 0}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.0178339840055306e-07}, "ground_truth": 1}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.0073807120997766e-08}, "ground_truth": 0}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 1.6490225920729783e-08}, "ground_truth": 0}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.4792920828701104e-07}, "ground_truth": 0}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.3660070805510506, "res": {"No": 0.6339928767614901, "Yes": 0.3660070805510506}, "ground_truth": 0}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.2504764653397577e-07}, "ground_truth": 1}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 1.841239054091584e-08}, "ground_truth": 0}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.858315711007791e-08}, "ground_truth": 0}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.05310347023737841, "res": {"No": 0.9468961849406566, "Yes": 0.05310347023737841}, "ground_truth": 0}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996194467087797, "res": {"Yes": 0.9996194467087797, "No": 0.0003802525198585069}, "ground_truth": 0}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2279501760510244, "res": {"No": 0.7720491231178924, "Yes": 0.2279501760510244}, "ground_truth": 1}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.350626966021991e-08}, "ground_truth": 0}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999981375378344, "res": {"Yes": 0.9999981375378344, "No": 1.4963960113098442e-06}, "ground_truth": 0}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 5.807903433718054e-07}, "ground_truth": 0}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9758667885750364, "res": {"Yes": 0.9758667885750364, "No": 0.024132833012493252}, "ground_truth": 0}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.624588241794478e-08}, "ground_truth": 1}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.432369173759303e-08}, "ground_truth": 0}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.802728620665192e-08}, "ground_truth": 0}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.001279550672228652, "res": {"No": 0.9987201052648164, "Yes": 0.001279550672228652}, "ground_truth": 0}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999950814892627, "res": {"Yes": 0.999950814892627, "No": 4.88978563435283e-05}, "ground_truth": 0}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.2144771451446485e-07}, "ground_truth": 1}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 3.839110315018292e-07}, "ground_truth": 0}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998374752085851, "res": {"Yes": 0.9998374752085851, "No": 0.00016215757666996456}, "ground_truth": 0}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.09414376972435558, "res": {"No": 0.9058558242357557, "Yes": 0.09414376972435558}, "ground_truth": 0}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998897874838456, "res": {"Yes": 0.9998897874838456, "No": 0.0001095325272836993}, "ground_truth": 0}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.217363738918324e-07}, "ground_truth": 1}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999961110815618, "res": {"Yes": 0.9999961110815618, "No": 3.4902524120346136e-06}, "ground_truth": 0}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.0039473282231946e-06}, "ground_truth": 0}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 3.4258878039563593e-07}, "ground_truth": 0}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999996945503965, "res": {"Yes": 0.999996945503965, "No": 2.7117293230558156e-06}, "ground_truth": 0}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999974223173222, "res": {"Yes": 0.9999974223173222, "No": 2.1065553079944074e-06}, "ground_truth": 1}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 6.400765873614771e-07}, "ground_truth": 0}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999996945503965, "res": {"Yes": 0.999996945503965, "No": 2.5606613320575608e-06}, "ground_truth": 0}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9939554018363407, "res": {"Yes": 0.9939554018363407, "No": 0.006044158563830934}, "ground_truth": 0}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.8078486694912475e-06, "res": {"No": 0.9999980183344636, "Yes": 1.8078486694912475e-06}, "ground_truth": 0}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.326199323144671e-08}, "ground_truth": 1}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.613803229498291e-08}, "ground_truth": 0}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.0171998946104489e-07}, "ground_truth": 0}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9952686598203005, "res": {"Yes": 0.9952686598203005, "No": 0.004731194364510464}, "ground_truth": 0}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997677621813781, "res": {"Yes": 0.9997677621813781, "No": 0.0002315421795395049}, "ground_truth": 1}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.1368879384706364e-07}, "ground_truth": 0}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.222450138353502e-08}, "ground_truth": 0}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.026952586828993222, "res": {"No": 0.9730470211627713, "Yes": 0.026952586828993222}, "ground_truth": 0}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.3526093218883387e-06, "res": {"No": 0.9999983759447187, "Yes": 1.3526093218883387e-06}, "ground_truth": 0}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9979046340506671, "res": {"Yes": 0.9979046340506671, "No": 0.0020948498475299423}, "ground_truth": 1}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.74631885407447e-08}, "ground_truth": 0}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999958726752174, "res": {"Yes": 0.9999958726752174, "No": 3.6084977787310746e-06}, "ground_truth": 0}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 3.7287298428477343e-07}, "ground_truth": 0}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.201749787490224e-07}, "ground_truth": 0}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999973031140366, "res": {"Yes": 0.9999973031140366, "No": 2.423099626430826e-06}, "ground_truth": 1}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.911890416646912e-08}, "ground_truth": 0}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.0940305743998597e-07}, "ground_truth": 0}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.026998494443553e-05, "res": {"No": 0.9999893165220688, "Yes": 1.026998494443553e-05}, "ground_truth": 0}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 3.2485854354386066e-07}, "ground_truth": 0}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 9.235305963399536e-08}, "ground_truth": 1}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 2.2906454894240353e-07}, "ground_truth": 0}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 6.774385428280663e-07}, "ground_truth": 0}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.999977753956447, "res": {"Yes": 0.999977753956447, "No": 2.180713168837111e-05}, "ground_truth": 0}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9811619788420365, "res": {"Yes": 0.9811619788420365, "No": 0.018835121257071544}, "ground_truth": 0}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999974223173222, "res": {"Yes": 0.9999974223173222, "No": 2.400604445567629e-06}, "ground_truth": 1}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999978991308068, "res": {"Yes": 0.9999978991308068, "No": 1.669417157396373e-06}, "ground_truth": 0}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "YES": 6.344617045034437e-08}, "ground_truth": 0}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 4.5538164015256415e-07}, "ground_truth": 0}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.14828885268399059, "res": {"No": 0.8517108906170844, "Yes": 0.14828885268399059}, "ground_truth": 0}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 1.7425172165074956e-08}, "ground_truth": 1}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999785883642324, "res": {"Yes": 0.9999785883642324, "No": 2.096846322898115e-05}, "ground_truth": 0}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.421273136553739e-08}, "ground_truth": 0}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.996959772280629, "res": {"Yes": 0.996959772280629, "No": 0.0030395661122011156}, "ground_truth": 0}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.0187319330215647e-07}, "ground_truth": 0}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.895046345932013e-08}, "ground_truth": 1}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.6658376291811054e-08}, "ground_truth": 0}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.4009023405477055e-06}, "ground_truth": 0}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.1169743516033167e-06, "res": {"No": 0.9999981375378344, "Yes": 1.1169743516033167e-06}, "ground_truth": 0}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9920362202649138, "res": {"Yes": 0.9920362202649138, "No": 0.007962900190049004}, "ground_truth": 0}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9973777281771199, "res": {"Yes": 0.9973777281771199, "No": 0.0026219184403942467}, "ground_truth": 1}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 9.067628322149986e-08}, "ground_truth": 0}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 1.0022400631652577e-06}, "ground_truth": 0}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.8580217673021135, "res": {"Yes": 0.8580217673021135, "No": 0.14197739843497512}, "ground_truth": 0}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.993403353335528, "res": {"Yes": 0.993403353335528, "No": 0.006596190457532087}, "ground_truth": 1}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 7.952240007737275e-07}, "ground_truth": 0}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.0640375173427804e-07}, "ground_truth": 0}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 4.439685164218728e-05, "res": {"No": 0.9999548676244963, "Yes": 4.439685164218728e-05}, "ground_truth": 0}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.596144114627442e-08}, "ground_truth": 0}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.7889371113717648e-07}, "ground_truth": 1}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.1493826276165056e-07}, "ground_truth": 0}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.8148492866102346e-08}, "ground_truth": 0}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9683795736750452, "res": {"Yes": 0.9683795736750452, "No": 0.03161835097149075}, "ground_truth": 0}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999044480253807, "res": {"Yes": 0.9999044480253807, "No": 9.534954479235324e-05}, "ground_truth": 0}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.841578209655829e-08}, "ground_truth": 1}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 6.763400147164032e-08}, "ground_truth": 0}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "YES": 7.013548258479933e-08}, "ground_truth": 0}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 4.905625711606964e-07, "res": {"No": 0.9999992103693117, "Yes": 4.905625711606964e-07}, "ground_truth": 0}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 5.421961503296035e-07}, "ground_truth": 0}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 2.999125214437062e-07}, "ground_truth": 1}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.58622724760728e-08}, "ground_truth": 0}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.4060447924566904e-07}, "ground_truth": 0}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999512916842885, "res": {"Yes": 0.9999512916842885, "No": 4.805026055413652e-05}, "ground_truth": 0}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.1944103718747978e-07}, "ground_truth": 0}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.057618547911947e-08}, "ground_truth": 1}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 6.397559893077275e-07}, "ground_truth": 0}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 9.173562848925429e-08}, "ground_truth": 0}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9997995789969731, "res": {"Yes": 0.9997995789969731, "No": 0.00020003704171304515}, "ground_truth": 0}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981633666789508, "res": {"Yes": 0.9981633666789508, "No": 0.0018359705999705215}, "ground_truth": 0}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 6.727278314628363e-07}, "ground_truth": 1}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 4.17826734968192e-07}, "ground_truth": 0}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999829988145218, "res": {"Yes": 0.9999829988145218, "No": 1.652634142548582e-05}, "ground_truth": 0}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999988005296937, "res": {"Yes": 0.999988005296937, "No": 1.1733882044095624e-05}, "ground_truth": 0}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999978991308068, "res": {"Yes": 0.9999978991308068, "No": 1.7048745971370144e-06}, "ground_truth": 1}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 4.7030237407823334e-07}, "ground_truth": 0}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.936869026026282e-08}, "ground_truth": 0}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9782153978469518, "res": {"Yes": 0.9782153978469518, "No": 0.021784004871135566}, "ground_truth": 0}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.8802067992237101, "res": {"Yes": 0.8802067992237101, "No": 0.11979246401446889}, "ground_truth": 0}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999975415208221, "res": {"Yes": 0.9999975415208221, "No": 2.189187691644317e-06}, "ground_truth": 1}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 1.9486009449722712e-07}, "ground_truth": 0}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999845484373766, "res": {"Yes": 0.9999845484373766, "No": 1.494656575351751e-05}, "ground_truth": 0}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999897933310884, "res": {"Yes": 0.9999897933310884, "No": 9.83804052862098e-06}, "ground_truth": 0}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999570495863321, "res": {"Yes": 0.999570495863321, "No": 0.00042884950704914917}, "ground_truth": 0}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.0256121470466007e-07}, "ground_truth": 1}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.8332368060859011e-07}, "ground_truth": 0}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "\"Yes": 7.296455949897989e-08}, "ground_truth": 0}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999381800049346, "res": {"Yes": 0.9999381800049346, "No": 6.128233067787773e-05}, "ground_truth": 0}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.3329868430974867, "res": {"No": 0.6670125617736524, "Yes": 0.3329868430974867}, "ground_truth": 0}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, " Yes": 1.7407910754266706e-07}, "ground_truth": 1}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.8339110214029584e-08}, "ground_truth": 0}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.672904112117633e-08}, "ground_truth": 0}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999922965856715, "res": {"Yes": 0.9999922965856715, "No": 7.382117768616843e-06}, "ground_truth": 0}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999105268429617, "res": {"Yes": 0.9999105268429617, "No": 8.832626877292427e-05}, "ground_truth": 0}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 2.0332259120135432e-07}, "ground_truth": 1}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.2905663646257216e-08}, "ground_truth": 0}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.6104886640028984e-07}, "ground_truth": 0}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.1149031698579568e-06, "res": {"No": 0.9999984951481323, "Yes": 1.1149031698579568e-06}, "ground_truth": 0}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.0005292560059362441, "res": {"No": 0.9994685542173628, "Yes": 0.0005292560059362441}, "ground_truth": 1}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999978991308068, "res": {"Yes": 0.9999978991308068, "No": 1.6703030181521615e-06}, "ground_truth": 0}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991624891781283, "res": {"Yes": 0.9991624891781283, "No": 0.0008364080214572173}, "ground_truth": 0}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 4.70305578735129e-05, "res": {"No": 0.9999516492769339, "Yes": 4.70305578735129e-05}, "ground_truth": 0}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.147513824784988e-08}, "ground_truth": 1}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994464052901277, "res": {"Yes": 0.9994464052901277, "No": 0.000552341910697948}, "ground_truth": 0}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.00885146408775768, "res": {"No": 0.9911476141521023, "Yes": 0.00885146408775768}, "ground_truth": 0}]